st_glsl_to_tgsi.cpp revision 39348bf79fb247eec895c93e52f23afe138be46a
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45extern "C" { 46#include "main/mtypes.h" 47#include "main/shaderapi.h" 48#include "main/shaderobj.h" 49#include "main/uniforms.h" 50#include "program/hash_table.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_uniform.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81#define MAX_TEMPS 4096 82 83/* will be 4 for GLSL 4.00 */ 84#define MAX_GLSL_TEXTURE_OFFSET 1 85 86class st_src_reg; 87class st_dst_reg; 88 89static int swizzle_for_size(int size); 90 91/** 92 * This struct is a corresponding struct to TGSI ureg_src. 93 */ 94class st_src_reg { 95public: 96 st_src_reg(gl_register_file file, int index, const glsl_type *type) 97 { 98 this->file = file; 99 this->index = index; 100 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 101 this->swizzle = swizzle_for_size(type->vector_elements); 102 else 103 this->swizzle = SWIZZLE_XYZW; 104 this->negate = 0; 105 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 106 this->reladdr = NULL; 107 } 108 109 st_src_reg(gl_register_file file, int index, int type) 110 { 111 this->type = type; 112 this->file = file; 113 this->index = index; 114 this->swizzle = SWIZZLE_XYZW; 115 this->negate = 0; 116 this->reladdr = NULL; 117 } 118 119 st_src_reg() 120 { 121 this->type = GLSL_TYPE_ERROR; 122 this->file = PROGRAM_UNDEFINED; 123 this->index = 0; 124 this->swizzle = 0; 125 this->negate = 0; 126 this->reladdr = NULL; 127 } 128 129 explicit st_src_reg(st_dst_reg reg); 130 131 gl_register_file file; /**< PROGRAM_* from Mesa */ 132 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 133 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 134 int negate; /**< NEGATE_XYZW mask from mesa */ 135 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 136 /** Register index should be offset by the integer in this reg. */ 137 st_src_reg *reladdr; 138}; 139 140class st_dst_reg { 141public: 142 st_dst_reg(gl_register_file file, int writemask, int type) 143 { 144 this->file = file; 145 this->index = 0; 146 this->writemask = writemask; 147 this->cond_mask = COND_TR; 148 this->reladdr = NULL; 149 this->type = type; 150 } 151 152 st_dst_reg() 153 { 154 this->type = GLSL_TYPE_ERROR; 155 this->file = PROGRAM_UNDEFINED; 156 this->index = 0; 157 this->writemask = 0; 158 this->cond_mask = COND_TR; 159 this->reladdr = NULL; 160 } 161 162 explicit st_dst_reg(st_src_reg reg); 163 164 gl_register_file file; /**< PROGRAM_* from Mesa */ 165 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 166 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 167 GLuint cond_mask:4; 168 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 169 /** Register index should be offset by the integer in this reg. */ 170 st_src_reg *reladdr; 171}; 172 173st_src_reg::st_src_reg(st_dst_reg reg) 174{ 175 this->type = reg.type; 176 this->file = reg.file; 177 this->index = reg.index; 178 this->swizzle = SWIZZLE_XYZW; 179 this->negate = 0; 180 this->reladdr = reg.reladdr; 181} 182 183st_dst_reg::st_dst_reg(st_src_reg reg) 184{ 185 this->type = reg.type; 186 this->file = reg.file; 187 this->index = reg.index; 188 this->writemask = WRITEMASK_XYZW; 189 this->cond_mask = COND_TR; 190 this->reladdr = reg.reladdr; 191} 192 193class glsl_to_tgsi_instruction : public exec_node { 194public: 195 /* Callers of this ralloc-based new need not call delete. It's 196 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 197 static void* operator new(size_t size, void *ctx) 198 { 199 void *node; 200 201 node = rzalloc_size(ctx, size); 202 assert(node != NULL); 203 204 return node; 205 } 206 207 unsigned op; 208 st_dst_reg dst; 209 st_src_reg src[3]; 210 /** Pointer to the ir source this tree came from for debugging */ 211 ir_instruction *ir; 212 GLboolean cond_update; 213 bool saturate; 214 int sampler; /**< sampler index */ 215 int tex_target; /**< One of TEXTURE_*_INDEX */ 216 GLboolean tex_shadow; 217 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 218 unsigned tex_offset_num_offset; 219 int dead_mask; /**< Used in dead code elimination */ 220 221 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 222}; 223 224class variable_storage : public exec_node { 225public: 226 variable_storage(ir_variable *var, gl_register_file file, int index) 227 : file(file), index(index), var(var) 228 { 229 /* empty */ 230 } 231 232 gl_register_file file; 233 int index; 234 ir_variable *var; /* variable that maps to this, if any */ 235}; 236 237class immediate_storage : public exec_node { 238public: 239 immediate_storage(gl_constant_value *values, int size, int type) 240 { 241 memcpy(this->values, values, size * sizeof(gl_constant_value)); 242 this->size = size; 243 this->type = type; 244 } 245 246 gl_constant_value values[4]; 247 int size; /**< Number of components (1-4) */ 248 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 249}; 250 251class function_entry : public exec_node { 252public: 253 ir_function_signature *sig; 254 255 /** 256 * identifier of this function signature used by the program. 257 * 258 * At the point that TGSI instructions for function calls are 259 * generated, we don't know the address of the first instruction of 260 * the function body. So we make the BranchTarget that is called a 261 * small integer and rewrite them during set_branchtargets(). 262 */ 263 int sig_id; 264 265 /** 266 * Pointer to first instruction of the function body. 267 * 268 * Set during function body emits after main() is processed. 269 */ 270 glsl_to_tgsi_instruction *bgn_inst; 271 272 /** 273 * Index of the first instruction of the function body in actual TGSI. 274 * 275 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 276 */ 277 int inst; 278 279 /** Storage for the return value. */ 280 st_src_reg return_reg; 281}; 282 283class glsl_to_tgsi_visitor : public ir_visitor { 284public: 285 glsl_to_tgsi_visitor(); 286 ~glsl_to_tgsi_visitor(); 287 288 function_entry *current_function; 289 290 struct gl_context *ctx; 291 struct gl_program *prog; 292 struct gl_shader_program *shader_program; 293 struct gl_shader_compiler_options *options; 294 295 int next_temp; 296 297 int num_address_regs; 298 int samplers_used; 299 bool indirect_addr_temps; 300 bool indirect_addr_consts; 301 302 int glsl_version; 303 bool native_integers; 304 305 variable_storage *find_variable_storage(ir_variable *var); 306 307 int add_constant(gl_register_file file, gl_constant_value values[4], 308 int size, int datatype, GLuint *swizzle_out); 309 310 function_entry *get_function_signature(ir_function_signature *sig); 311 312 st_src_reg get_temp(const glsl_type *type); 313 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 314 315 st_src_reg st_src_reg_for_float(float val); 316 st_src_reg st_src_reg_for_int(int val); 317 st_src_reg st_src_reg_for_type(int type, int val); 318 319 /** 320 * \name Visit methods 321 * 322 * As typical for the visitor pattern, there must be one \c visit method for 323 * each concrete subclass of \c ir_instruction. Virtual base classes within 324 * the hierarchy should not have \c visit methods. 325 */ 326 /*@{*/ 327 virtual void visit(ir_variable *); 328 virtual void visit(ir_loop *); 329 virtual void visit(ir_loop_jump *); 330 virtual void visit(ir_function_signature *); 331 virtual void visit(ir_function *); 332 virtual void visit(ir_expression *); 333 virtual void visit(ir_swizzle *); 334 virtual void visit(ir_dereference_variable *); 335 virtual void visit(ir_dereference_array *); 336 virtual void visit(ir_dereference_record *); 337 virtual void visit(ir_assignment *); 338 virtual void visit(ir_constant *); 339 virtual void visit(ir_call *); 340 virtual void visit(ir_return *); 341 virtual void visit(ir_discard *); 342 virtual void visit(ir_texture *); 343 virtual void visit(ir_if *); 344 /*@}*/ 345 346 st_src_reg result; 347 348 /** List of variable_storage */ 349 exec_list variables; 350 351 /** List of immediate_storage */ 352 exec_list immediates; 353 int num_immediates; 354 355 /** List of function_entry */ 356 exec_list function_signatures; 357 int next_signature_id; 358 359 /** List of glsl_to_tgsi_instruction */ 360 exec_list instructions; 361 362 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 363 364 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 365 st_dst_reg dst, st_src_reg src0); 366 367 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 368 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 369 370 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 371 st_dst_reg dst, 372 st_src_reg src0, st_src_reg src1, st_src_reg src2); 373 374 unsigned get_opcode(ir_instruction *ir, unsigned op, 375 st_dst_reg dst, 376 st_src_reg src0, st_src_reg src1); 377 378 /** 379 * Emit the correct dot-product instruction for the type of arguments 380 */ 381 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 382 st_dst_reg dst, 383 st_src_reg src0, 384 st_src_reg src1, 385 unsigned elements); 386 387 void emit_scalar(ir_instruction *ir, unsigned op, 388 st_dst_reg dst, st_src_reg src0); 389 390 void emit_scalar(ir_instruction *ir, unsigned op, 391 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 392 393 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 394 395 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 396 397 void emit_scs(ir_instruction *ir, unsigned op, 398 st_dst_reg dst, const st_src_reg &src); 399 400 bool try_emit_mad(ir_expression *ir, 401 int mul_operand); 402 bool try_emit_mad_for_and_not(ir_expression *ir, 403 int mul_operand); 404 bool try_emit_sat(ir_expression *ir); 405 406 void emit_swz(ir_expression *ir); 407 408 bool process_move_condition(ir_rvalue *ir); 409 410 void remove_output_reads(gl_register_file type); 411 void simplify_cmp(void); 412 413 void rename_temp_register(int index, int new_index); 414 int get_first_temp_read(int index); 415 int get_first_temp_write(int index); 416 int get_last_temp_read(int index); 417 int get_last_temp_write(int index); 418 419 void copy_propagate(void); 420 void eliminate_dead_code(void); 421 int eliminate_dead_code_advanced(void); 422 void merge_registers(void); 423 void renumber_registers(void); 424 425 void *mem_ctx; 426}; 427 428static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 429 430static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 431 432static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 433 434static void 435fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 436 437static void 438fail_link(struct gl_shader_program *prog, const char *fmt, ...) 439{ 440 va_list args; 441 va_start(args, fmt); 442 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 443 va_end(args); 444 445 prog->LinkStatus = GL_FALSE; 446} 447 448static int 449swizzle_for_size(int size) 450{ 451 int size_swizzles[4] = { 452 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 453 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 454 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 455 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 456 }; 457 458 assert((size >= 1) && (size <= 4)); 459 return size_swizzles[size - 1]; 460} 461 462static bool 463is_tex_instruction(unsigned opcode) 464{ 465 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 466 return info->is_tex; 467} 468 469static unsigned 470num_inst_dst_regs(unsigned opcode) 471{ 472 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 473 return info->num_dst; 474} 475 476static unsigned 477num_inst_src_regs(unsigned opcode) 478{ 479 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 480 return info->is_tex ? info->num_src - 1 : info->num_src; 481} 482 483glsl_to_tgsi_instruction * 484glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 485 st_dst_reg dst, 486 st_src_reg src0, st_src_reg src1, st_src_reg src2) 487{ 488 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 489 int num_reladdr = 0, i; 490 491 op = get_opcode(ir, op, dst, src0, src1); 492 493 /* If we have to do relative addressing, we want to load the ARL 494 * reg directly for one of the regs, and preload the other reladdr 495 * sources into temps. 496 */ 497 num_reladdr += dst.reladdr != NULL; 498 num_reladdr += src0.reladdr != NULL; 499 num_reladdr += src1.reladdr != NULL; 500 num_reladdr += src2.reladdr != NULL; 501 502 reladdr_to_temp(ir, &src2, &num_reladdr); 503 reladdr_to_temp(ir, &src1, &num_reladdr); 504 reladdr_to_temp(ir, &src0, &num_reladdr); 505 506 if (dst.reladdr) { 507 emit_arl(ir, address_reg, *dst.reladdr); 508 num_reladdr--; 509 } 510 assert(num_reladdr == 0); 511 512 inst->op = op; 513 inst->dst = dst; 514 inst->src[0] = src0; 515 inst->src[1] = src1; 516 inst->src[2] = src2; 517 inst->ir = ir; 518 inst->dead_mask = 0; 519 520 inst->function = NULL; 521 522 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 523 this->num_address_regs = 1; 524 525 /* Update indirect addressing status used by TGSI */ 526 if (dst.reladdr) { 527 switch(dst.file) { 528 case PROGRAM_TEMPORARY: 529 this->indirect_addr_temps = true; 530 break; 531 case PROGRAM_LOCAL_PARAM: 532 case PROGRAM_ENV_PARAM: 533 case PROGRAM_STATE_VAR: 534 case PROGRAM_NAMED_PARAM: 535 case PROGRAM_CONSTANT: 536 case PROGRAM_UNIFORM: 537 this->indirect_addr_consts = true; 538 break; 539 case PROGRAM_IMMEDIATE: 540 assert(!"immediates should not have indirect addressing"); 541 break; 542 default: 543 break; 544 } 545 } 546 else { 547 for (i=0; i<3; i++) { 548 if(inst->src[i].reladdr) { 549 switch(inst->src[i].file) { 550 case PROGRAM_TEMPORARY: 551 this->indirect_addr_temps = true; 552 break; 553 case PROGRAM_LOCAL_PARAM: 554 case PROGRAM_ENV_PARAM: 555 case PROGRAM_STATE_VAR: 556 case PROGRAM_NAMED_PARAM: 557 case PROGRAM_CONSTANT: 558 case PROGRAM_UNIFORM: 559 this->indirect_addr_consts = true; 560 break; 561 case PROGRAM_IMMEDIATE: 562 assert(!"immediates should not have indirect addressing"); 563 break; 564 default: 565 break; 566 } 567 } 568 } 569 } 570 571 this->instructions.push_tail(inst); 572 573 if (native_integers) 574 try_emit_float_set(ir, op, dst); 575 576 return inst; 577} 578 579 580glsl_to_tgsi_instruction * 581glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 582 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 583{ 584 return emit(ir, op, dst, src0, src1, undef_src); 585} 586 587glsl_to_tgsi_instruction * 588glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 589 st_dst_reg dst, st_src_reg src0) 590{ 591 assert(dst.writemask != 0); 592 return emit(ir, op, dst, src0, undef_src, undef_src); 593} 594 595glsl_to_tgsi_instruction * 596glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 597{ 598 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 599} 600 601 /** 602 * Emits the code to convert the result of float SET instructions to integers. 603 */ 604void 605glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 606 st_dst_reg dst) 607{ 608 if ((op == TGSI_OPCODE_SEQ || 609 op == TGSI_OPCODE_SNE || 610 op == TGSI_OPCODE_SGE || 611 op == TGSI_OPCODE_SLT)) 612 { 613 st_src_reg src = st_src_reg(dst); 614 src.negate = ~src.negate; 615 dst.type = GLSL_TYPE_FLOAT; 616 emit(ir, TGSI_OPCODE_F2I, dst, src); 617 } 618} 619 620/** 621 * Determines whether to use an integer, unsigned integer, or float opcode 622 * based on the operands and input opcode, then emits the result. 623 */ 624unsigned 625glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 626 st_dst_reg dst, 627 st_src_reg src0, st_src_reg src1) 628{ 629 int type = GLSL_TYPE_FLOAT; 630 631 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 632 type = GLSL_TYPE_FLOAT; 633 else if (native_integers) 634 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 635 636#define case4(c, f, i, u) \ 637 case TGSI_OPCODE_##c: \ 638 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 639 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 640 else op = TGSI_OPCODE_##f; \ 641 break; 642#define case3(f, i, u) case4(f, f, i, u) 643#define case2fi(f, i) case4(f, f, i, i) 644#define case2iu(i, u) case4(i, LAST, i, u) 645 646 switch(op) { 647 case2fi(ADD, UADD); 648 case2fi(MUL, UMUL); 649 case2fi(MAD, UMAD); 650 case3(DIV, IDIV, UDIV); 651 case3(MAX, IMAX, UMAX); 652 case3(MIN, IMIN, UMIN); 653 case2iu(MOD, UMOD); 654 655 case2fi(SEQ, USEQ); 656 case2fi(SNE, USNE); 657 case3(SGE, ISGE, USGE); 658 case3(SLT, ISLT, USLT); 659 660 case2iu(ISHR, USHR); 661 662 default: break; 663 } 664 665 assert(op != TGSI_OPCODE_LAST); 666 return op; 667} 668 669glsl_to_tgsi_instruction * 670glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 671 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 672 unsigned elements) 673{ 674 static const unsigned dot_opcodes[] = { 675 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 676 }; 677 678 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 679} 680 681/** 682 * Emits TGSI scalar opcodes to produce unique answers across channels. 683 * 684 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 685 * channel determines the result across all channels. So to do a vec4 686 * of this operation, we want to emit a scalar per source channel used 687 * to produce dest channels. 688 */ 689void 690glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 691 st_dst_reg dst, 692 st_src_reg orig_src0, st_src_reg orig_src1) 693{ 694 int i, j; 695 int done_mask = ~dst.writemask; 696 697 /* TGSI RCP is a scalar operation splatting results to all channels, 698 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 699 * dst channels. 700 */ 701 for (i = 0; i < 4; i++) { 702 GLuint this_mask = (1 << i); 703 glsl_to_tgsi_instruction *inst; 704 st_src_reg src0 = orig_src0; 705 st_src_reg src1 = orig_src1; 706 707 if (done_mask & this_mask) 708 continue; 709 710 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 711 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 712 for (j = i + 1; j < 4; j++) { 713 /* If there is another enabled component in the destination that is 714 * derived from the same inputs, generate its value on this pass as 715 * well. 716 */ 717 if (!(done_mask & (1 << j)) && 718 GET_SWZ(src0.swizzle, j) == src0_swiz && 719 GET_SWZ(src1.swizzle, j) == src1_swiz) { 720 this_mask |= (1 << j); 721 } 722 } 723 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 724 src0_swiz, src0_swiz); 725 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 726 src1_swiz, src1_swiz); 727 728 inst = emit(ir, op, dst, src0, src1); 729 inst->dst.writemask = this_mask; 730 done_mask |= this_mask; 731 } 732} 733 734void 735glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 736 st_dst_reg dst, st_src_reg src0) 737{ 738 st_src_reg undef = undef_src; 739 740 undef.swizzle = SWIZZLE_XXXX; 741 742 emit_scalar(ir, op, dst, src0, undef); 743} 744 745void 746glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 747 st_dst_reg dst, st_src_reg src0) 748{ 749 int op = TGSI_OPCODE_ARL; 750 751 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 752 op = TGSI_OPCODE_UARL; 753 754 emit(NULL, op, dst, src0); 755} 756 757/** 758 * Emit an TGSI_OPCODE_SCS instruction 759 * 760 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 761 * Instead of splatting its result across all four components of the 762 * destination, it writes one value to the \c x component and another value to 763 * the \c y component. 764 * 765 * \param ir IR instruction being processed 766 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 767 * on which value is desired. 768 * \param dst Destination register 769 * \param src Source register 770 */ 771void 772glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 773 st_dst_reg dst, 774 const st_src_reg &src) 775{ 776 /* Vertex programs cannot use the SCS opcode. 777 */ 778 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 779 emit_scalar(ir, op, dst, src); 780 return; 781 } 782 783 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 784 const unsigned scs_mask = (1U << component); 785 int done_mask = ~dst.writemask; 786 st_src_reg tmp; 787 788 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 789 790 /* If there are compnents in the destination that differ from the component 791 * that will be written by the SCS instrution, we'll need a temporary. 792 */ 793 if (scs_mask != unsigned(dst.writemask)) { 794 tmp = get_temp(glsl_type::vec4_type); 795 } 796 797 for (unsigned i = 0; i < 4; i++) { 798 unsigned this_mask = (1U << i); 799 st_src_reg src0 = src; 800 801 if ((done_mask & this_mask) != 0) 802 continue; 803 804 /* The source swizzle specified which component of the source generates 805 * sine / cosine for the current component in the destination. The SCS 806 * instruction requires that this value be swizzle to the X component. 807 * Replace the current swizzle with a swizzle that puts the source in 808 * the X component. 809 */ 810 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 811 812 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 813 src0_swiz, src0_swiz); 814 for (unsigned j = i + 1; j < 4; j++) { 815 /* If there is another enabled component in the destination that is 816 * derived from the same inputs, generate its value on this pass as 817 * well. 818 */ 819 if (!(done_mask & (1 << j)) && 820 GET_SWZ(src0.swizzle, j) == src0_swiz) { 821 this_mask |= (1 << j); 822 } 823 } 824 825 if (this_mask != scs_mask) { 826 glsl_to_tgsi_instruction *inst; 827 st_dst_reg tmp_dst = st_dst_reg(tmp); 828 829 /* Emit the SCS instruction. 830 */ 831 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 832 inst->dst.writemask = scs_mask; 833 834 /* Move the result of the SCS instruction to the desired location in 835 * the destination. 836 */ 837 tmp.swizzle = MAKE_SWIZZLE4(component, component, 838 component, component); 839 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 840 inst->dst.writemask = this_mask; 841 } else { 842 /* Emit the SCS instruction to write directly to the destination. 843 */ 844 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 845 inst->dst.writemask = scs_mask; 846 } 847 848 done_mask |= this_mask; 849 } 850} 851 852int 853glsl_to_tgsi_visitor::add_constant(gl_register_file file, 854 gl_constant_value values[4], int size, int datatype, 855 GLuint *swizzle_out) 856{ 857 if (file == PROGRAM_CONSTANT) { 858 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 859 size, datatype, swizzle_out); 860 } else { 861 int index = 0; 862 immediate_storage *entry; 863 assert(file == PROGRAM_IMMEDIATE); 864 865 /* Search immediate storage to see if we already have an identical 866 * immediate that we can use instead of adding a duplicate entry. 867 */ 868 foreach_iter(exec_list_iterator, iter, this->immediates) { 869 entry = (immediate_storage *)iter.get(); 870 871 if (entry->size == size && 872 entry->type == datatype && 873 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 874 return index; 875 } 876 index++; 877 } 878 879 /* Add this immediate to the list. */ 880 entry = new(mem_ctx) immediate_storage(values, size, datatype); 881 this->immediates.push_tail(entry); 882 this->num_immediates++; 883 return index; 884 } 885} 886 887st_src_reg 888glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 889{ 890 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 891 union gl_constant_value uval; 892 893 uval.f = val; 894 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 895 896 return src; 897} 898 899st_src_reg 900glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 901{ 902 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 903 union gl_constant_value uval; 904 905 assert(native_integers); 906 907 uval.i = val; 908 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 909 910 return src; 911} 912 913st_src_reg 914glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 915{ 916 if (native_integers) 917 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 918 st_src_reg_for_int(val); 919 else 920 return st_src_reg_for_float(val); 921} 922 923static int 924type_size(const struct glsl_type *type) 925{ 926 unsigned int i; 927 int size; 928 929 switch (type->base_type) { 930 case GLSL_TYPE_UINT: 931 case GLSL_TYPE_INT: 932 case GLSL_TYPE_FLOAT: 933 case GLSL_TYPE_BOOL: 934 if (type->is_matrix()) { 935 return type->matrix_columns; 936 } else { 937 /* Regardless of size of vector, it gets a vec4. This is bad 938 * packing for things like floats, but otherwise arrays become a 939 * mess. Hopefully a later pass over the code can pack scalars 940 * down if appropriate. 941 */ 942 return 1; 943 } 944 case GLSL_TYPE_ARRAY: 945 assert(type->length > 0); 946 return type_size(type->fields.array) * type->length; 947 case GLSL_TYPE_STRUCT: 948 size = 0; 949 for (i = 0; i < type->length; i++) { 950 size += type_size(type->fields.structure[i].type); 951 } 952 return size; 953 case GLSL_TYPE_SAMPLER: 954 /* Samplers take up one slot in UNIFORMS[], but they're baked in 955 * at link time. 956 */ 957 return 1; 958 default: 959 assert(0); 960 return 0; 961 } 962} 963 964/** 965 * In the initial pass of codegen, we assign temporary numbers to 966 * intermediate results. (not SSA -- variable assignments will reuse 967 * storage). 968 */ 969st_src_reg 970glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 971{ 972 st_src_reg src; 973 974 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 975 src.file = PROGRAM_TEMPORARY; 976 src.index = next_temp; 977 src.reladdr = NULL; 978 next_temp += type_size(type); 979 980 if (type->is_array() || type->is_record()) { 981 src.swizzle = SWIZZLE_NOOP; 982 } else { 983 src.swizzle = swizzle_for_size(type->vector_elements); 984 } 985 src.negate = 0; 986 987 return src; 988} 989 990variable_storage * 991glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 992{ 993 994 variable_storage *entry; 995 996 foreach_iter(exec_list_iterator, iter, this->variables) { 997 entry = (variable_storage *)iter.get(); 998 999 if (entry->var == var) 1000 return entry; 1001 } 1002 1003 return NULL; 1004} 1005 1006void 1007glsl_to_tgsi_visitor::visit(ir_variable *ir) 1008{ 1009 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1010 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1011 1012 fp->OriginUpperLeft = ir->origin_upper_left; 1013 fp->PixelCenterInteger = ir->pixel_center_integer; 1014 1015 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 1016 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1017 switch (ir->depth_layout) { 1018 case ir_depth_layout_none: 1019 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; 1020 break; 1021 case ir_depth_layout_any: 1022 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; 1023 break; 1024 case ir_depth_layout_greater: 1025 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; 1026 break; 1027 case ir_depth_layout_less: 1028 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; 1029 break; 1030 case ir_depth_layout_unchanged: 1031 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; 1032 break; 1033 default: 1034 assert(0); 1035 break; 1036 } 1037 } 1038 1039 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1040 unsigned int i; 1041 const ir_state_slot *const slots = ir->state_slots; 1042 assert(ir->state_slots != NULL); 1043 1044 /* Check if this statevar's setup in the STATE file exactly 1045 * matches how we'll want to reference it as a 1046 * struct/array/whatever. If not, then we need to move it into 1047 * temporary storage and hope that it'll get copy-propagated 1048 * out. 1049 */ 1050 for (i = 0; i < ir->num_state_slots; i++) { 1051 if (slots[i].swizzle != SWIZZLE_XYZW) { 1052 break; 1053 } 1054 } 1055 1056 variable_storage *storage; 1057 st_dst_reg dst; 1058 if (i == ir->num_state_slots) { 1059 /* We'll set the index later. */ 1060 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1061 this->variables.push_tail(storage); 1062 1063 dst = undef_dst; 1064 } else { 1065 /* The variable_storage constructor allocates slots based on the size 1066 * of the type. However, this had better match the number of state 1067 * elements that we're going to copy into the new temporary. 1068 */ 1069 assert((int) ir->num_state_slots == type_size(ir->type)); 1070 1071 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1072 this->next_temp); 1073 this->variables.push_tail(storage); 1074 this->next_temp += type_size(ir->type); 1075 1076 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1077 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1078 } 1079 1080 1081 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1082 int index = _mesa_add_state_reference(this->prog->Parameters, 1083 (gl_state_index *)slots[i].tokens); 1084 1085 if (storage->file == PROGRAM_STATE_VAR) { 1086 if (storage->index == -1) { 1087 storage->index = index; 1088 } else { 1089 assert(index == storage->index + (int)i); 1090 } 1091 } else { 1092 st_src_reg src(PROGRAM_STATE_VAR, index, 1093 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1094 src.swizzle = slots[i].swizzle; 1095 emit(ir, TGSI_OPCODE_MOV, dst, src); 1096 /* even a float takes up a whole vec4 reg in a struct/array. */ 1097 dst.index++; 1098 } 1099 } 1100 1101 if (storage->file == PROGRAM_TEMPORARY && 1102 dst.index != storage->index + (int) ir->num_state_slots) { 1103 fail_link(this->shader_program, 1104 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1105 ir->name, dst.index - storage->index, 1106 type_size(ir->type)); 1107 } 1108 } 1109} 1110 1111void 1112glsl_to_tgsi_visitor::visit(ir_loop *ir) 1113{ 1114 ir_dereference_variable *counter = NULL; 1115 1116 if (ir->counter != NULL) 1117 counter = new(ir) ir_dereference_variable(ir->counter); 1118 1119 if (ir->from != NULL) { 1120 assert(ir->counter != NULL); 1121 1122 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1123 1124 a->accept(this); 1125 delete a; 1126 } 1127 1128 emit(NULL, TGSI_OPCODE_BGNLOOP); 1129 1130 if (ir->to) { 1131 ir_expression *e = 1132 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1133 counter, ir->to); 1134 ir_if *if_stmt = new(ir) ir_if(e); 1135 1136 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1137 1138 if_stmt->then_instructions.push_tail(brk); 1139 1140 if_stmt->accept(this); 1141 1142 delete if_stmt; 1143 delete e; 1144 delete brk; 1145 } 1146 1147 visit_exec_list(&ir->body_instructions, this); 1148 1149 if (ir->increment) { 1150 ir_expression *e = 1151 new(ir) ir_expression(ir_binop_add, counter->type, 1152 counter, ir->increment); 1153 1154 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1155 1156 a->accept(this); 1157 delete a; 1158 delete e; 1159 } 1160 1161 emit(NULL, TGSI_OPCODE_ENDLOOP); 1162} 1163 1164void 1165glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1166{ 1167 switch (ir->mode) { 1168 case ir_loop_jump::jump_break: 1169 emit(NULL, TGSI_OPCODE_BRK); 1170 break; 1171 case ir_loop_jump::jump_continue: 1172 emit(NULL, TGSI_OPCODE_CONT); 1173 break; 1174 } 1175} 1176 1177 1178void 1179glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1180{ 1181 assert(0); 1182 (void)ir; 1183} 1184 1185void 1186glsl_to_tgsi_visitor::visit(ir_function *ir) 1187{ 1188 /* Ignore function bodies other than main() -- we shouldn't see calls to 1189 * them since they should all be inlined before we get to glsl_to_tgsi. 1190 */ 1191 if (strcmp(ir->name, "main") == 0) { 1192 const ir_function_signature *sig; 1193 exec_list empty; 1194 1195 sig = ir->matching_signature(&empty); 1196 1197 assert(sig); 1198 1199 foreach_iter(exec_list_iterator, iter, sig->body) { 1200 ir_instruction *ir = (ir_instruction *)iter.get(); 1201 1202 ir->accept(this); 1203 } 1204 } 1205} 1206 1207bool 1208glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1209{ 1210 int nonmul_operand = 1 - mul_operand; 1211 st_src_reg a, b, c; 1212 st_dst_reg result_dst; 1213 1214 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1215 if (!expr || expr->operation != ir_binop_mul) 1216 return false; 1217 1218 expr->operands[0]->accept(this); 1219 a = this->result; 1220 expr->operands[1]->accept(this); 1221 b = this->result; 1222 ir->operands[nonmul_operand]->accept(this); 1223 c = this->result; 1224 1225 this->result = get_temp(ir->type); 1226 result_dst = st_dst_reg(this->result); 1227 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1228 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1229 1230 return true; 1231} 1232 1233/** 1234 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1235 * 1236 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1237 * implemented using multiplication, and logical-or is implemented using 1238 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1239 * As result, the logical expression (a & !b) can be rewritten as: 1240 * 1241 * - a * !b 1242 * - a * (1 - b) 1243 * - (a * 1) - (a * b) 1244 * - a + -(a * b) 1245 * - a + (a * -b) 1246 * 1247 * This final expression can be implemented as a single MAD(a, -b, a) 1248 * instruction. 1249 */ 1250bool 1251glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1252{ 1253 const int other_operand = 1 - try_operand; 1254 st_src_reg a, b; 1255 1256 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1257 if (!expr || expr->operation != ir_unop_logic_not) 1258 return false; 1259 1260 ir->operands[other_operand]->accept(this); 1261 a = this->result; 1262 expr->operands[0]->accept(this); 1263 b = this->result; 1264 1265 b.negate = ~b.negate; 1266 1267 this->result = get_temp(ir->type); 1268 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1269 1270 return true; 1271} 1272 1273bool 1274glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1275{ 1276 /* Saturates were only introduced to vertex programs in 1277 * NV_vertex_program3, so don't give them to drivers in the VP. 1278 */ 1279 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1280 return false; 1281 1282 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1283 if (!sat_src) 1284 return false; 1285 1286 sat_src->accept(this); 1287 st_src_reg src = this->result; 1288 1289 /* If we generated an expression instruction into a temporary in 1290 * processing the saturate's operand, apply the saturate to that 1291 * instruction. Otherwise, generate a MOV to do the saturate. 1292 * 1293 * Note that we have to be careful to only do this optimization if 1294 * the instruction in question was what generated src->result. For 1295 * example, ir_dereference_array might generate a MUL instruction 1296 * to create the reladdr, and return us a src reg using that 1297 * reladdr. That MUL result is not the value we're trying to 1298 * saturate. 1299 */ 1300 ir_expression *sat_src_expr = sat_src->as_expression(); 1301 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1302 sat_src_expr->operation == ir_binop_add || 1303 sat_src_expr->operation == ir_binop_dot)) { 1304 glsl_to_tgsi_instruction *new_inst; 1305 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1306 new_inst->saturate = true; 1307 } else { 1308 this->result = get_temp(ir->type); 1309 st_dst_reg result_dst = st_dst_reg(this->result); 1310 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1311 glsl_to_tgsi_instruction *inst; 1312 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1313 inst->saturate = true; 1314 } 1315 1316 return true; 1317} 1318 1319void 1320glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1321 st_src_reg *reg, int *num_reladdr) 1322{ 1323 if (!reg->reladdr) 1324 return; 1325 1326 emit_arl(ir, address_reg, *reg->reladdr); 1327 1328 if (*num_reladdr != 1) { 1329 st_src_reg temp = get_temp(glsl_type::vec4_type); 1330 1331 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1332 *reg = temp; 1333 } 1334 1335 (*num_reladdr)--; 1336} 1337 1338void 1339glsl_to_tgsi_visitor::visit(ir_expression *ir) 1340{ 1341 unsigned int operand; 1342 st_src_reg op[Elements(ir->operands)]; 1343 st_src_reg result_src; 1344 st_dst_reg result_dst; 1345 1346 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1347 */ 1348 if (ir->operation == ir_binop_add) { 1349 if (try_emit_mad(ir, 1)) 1350 return; 1351 if (try_emit_mad(ir, 0)) 1352 return; 1353 } 1354 1355 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1356 */ 1357 if (ir->operation == ir_binop_logic_and) { 1358 if (try_emit_mad_for_and_not(ir, 1)) 1359 return; 1360 if (try_emit_mad_for_and_not(ir, 0)) 1361 return; 1362 } 1363 1364 if (try_emit_sat(ir)) 1365 return; 1366 1367 if (ir->operation == ir_quadop_vector) 1368 assert(!"ir_quadop_vector should have been lowered"); 1369 1370 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1371 this->result.file = PROGRAM_UNDEFINED; 1372 ir->operands[operand]->accept(this); 1373 if (this->result.file == PROGRAM_UNDEFINED) { 1374 ir_print_visitor v; 1375 printf("Failed to get tree for expression operand:\n"); 1376 ir->operands[operand]->accept(&v); 1377 exit(1); 1378 } 1379 op[operand] = this->result; 1380 1381 /* Matrix expression operands should have been broken down to vector 1382 * operations already. 1383 */ 1384 assert(!ir->operands[operand]->type->is_matrix()); 1385 } 1386 1387 int vector_elements = ir->operands[0]->type->vector_elements; 1388 if (ir->operands[1]) { 1389 vector_elements = MAX2(vector_elements, 1390 ir->operands[1]->type->vector_elements); 1391 } 1392 1393 this->result.file = PROGRAM_UNDEFINED; 1394 1395 /* Storage for our result. Ideally for an assignment we'd be using 1396 * the actual storage for the result here, instead. 1397 */ 1398 result_src = get_temp(ir->type); 1399 /* convenience for the emit functions below. */ 1400 result_dst = st_dst_reg(result_src); 1401 /* Limit writes to the channels that will be used by result_src later. 1402 * This does limit this temp's use as a temporary for multi-instruction 1403 * sequences. 1404 */ 1405 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1406 1407 switch (ir->operation) { 1408 case ir_unop_logic_not: 1409 if (result_dst.type != GLSL_TYPE_FLOAT) 1410 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1411 else { 1412 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1413 * older GPUs implement SEQ using multiple instructions (i915 uses two 1414 * SGE instructions and a MUL instruction). Since our logic values are 1415 * 0.0 and 1.0, 1-x also implements !x. 1416 */ 1417 op[0].negate = ~op[0].negate; 1418 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1419 } 1420 break; 1421 case ir_unop_neg: 1422 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1423 if (result_dst.type == GLSL_TYPE_INT) 1424 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1425 else { 1426 op[0].negate = ~op[0].negate; 1427 result_src = op[0]; 1428 } 1429 break; 1430 case ir_unop_abs: 1431 assert(result_dst.type == GLSL_TYPE_FLOAT); 1432 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1433 break; 1434 case ir_unop_sign: 1435 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1436 break; 1437 case ir_unop_rcp: 1438 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1439 break; 1440 1441 case ir_unop_exp2: 1442 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1443 break; 1444 case ir_unop_exp: 1445 case ir_unop_log: 1446 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1447 break; 1448 case ir_unop_log2: 1449 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1450 break; 1451 case ir_unop_sin: 1452 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1453 break; 1454 case ir_unop_cos: 1455 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1456 break; 1457 case ir_unop_sin_reduced: 1458 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1459 break; 1460 case ir_unop_cos_reduced: 1461 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1462 break; 1463 1464 case ir_unop_dFdx: 1465 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1466 break; 1467 case ir_unop_dFdy: 1468 op[0].negate = ~op[0].negate; 1469 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1470 break; 1471 1472 case ir_unop_noise: { 1473 /* At some point, a motivated person could add a better 1474 * implementation of noise. Currently not even the nvidia 1475 * binary drivers do anything more than this. In any case, the 1476 * place to do this is in the GL state tracker, not the poor 1477 * driver. 1478 */ 1479 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1480 break; 1481 } 1482 1483 case ir_binop_add: 1484 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1485 break; 1486 case ir_binop_sub: 1487 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1488 break; 1489 1490 case ir_binop_mul: 1491 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1492 break; 1493 case ir_binop_div: 1494 if (result_dst.type == GLSL_TYPE_FLOAT) 1495 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1496 else 1497 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1498 break; 1499 case ir_binop_mod: 1500 if (result_dst.type == GLSL_TYPE_FLOAT) 1501 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1502 else 1503 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1504 break; 1505 1506 case ir_binop_less: 1507 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1508 break; 1509 case ir_binop_greater: 1510 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1511 break; 1512 case ir_binop_lequal: 1513 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1514 break; 1515 case ir_binop_gequal: 1516 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1517 break; 1518 case ir_binop_equal: 1519 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1520 break; 1521 case ir_binop_nequal: 1522 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1523 break; 1524 case ir_binop_all_equal: 1525 /* "==" operator producing a scalar boolean. */ 1526 if (ir->operands[0]->type->is_vector() || 1527 ir->operands[1]->type->is_vector()) { 1528 st_src_reg temp = get_temp(native_integers ? 1529 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1530 glsl_type::vec4_type); 1531 1532 if (native_integers) { 1533 st_dst_reg temp_dst = st_dst_reg(temp); 1534 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1535 1536 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1537 1538 /* Emit 1-3 AND operations to combine the SEQ results. */ 1539 switch (ir->operands[0]->type->vector_elements) { 1540 case 2: 1541 break; 1542 case 3: 1543 temp_dst.writemask = WRITEMASK_Y; 1544 temp1.swizzle = SWIZZLE_YYYY; 1545 temp2.swizzle = SWIZZLE_ZZZZ; 1546 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1547 break; 1548 case 4: 1549 temp_dst.writemask = WRITEMASK_X; 1550 temp1.swizzle = SWIZZLE_XXXX; 1551 temp2.swizzle = SWIZZLE_YYYY; 1552 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1553 temp_dst.writemask = WRITEMASK_Y; 1554 temp1.swizzle = SWIZZLE_ZZZZ; 1555 temp2.swizzle = SWIZZLE_WWWW; 1556 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1557 } 1558 1559 temp1.swizzle = SWIZZLE_XXXX; 1560 temp2.swizzle = SWIZZLE_YYYY; 1561 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1562 } else { 1563 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1564 1565 /* After the dot-product, the value will be an integer on the 1566 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1567 */ 1568 emit_dp(ir, result_dst, temp, temp, vector_elements); 1569 1570 /* Negating the result of the dot-product gives values on the range 1571 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1572 * This is achieved using SGE. 1573 */ 1574 st_src_reg sge_src = result_src; 1575 sge_src.negate = ~sge_src.negate; 1576 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1577 } 1578 } else { 1579 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1580 } 1581 break; 1582 case ir_binop_any_nequal: 1583 /* "!=" operator producing a scalar boolean. */ 1584 if (ir->operands[0]->type->is_vector() || 1585 ir->operands[1]->type->is_vector()) { 1586 st_src_reg temp = get_temp(native_integers ? 1587 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1588 glsl_type::vec4_type); 1589 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1590 1591 if (native_integers) { 1592 st_dst_reg temp_dst = st_dst_reg(temp); 1593 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1594 1595 /* Emit 1-3 OR operations to combine the SNE results. */ 1596 switch (ir->operands[0]->type->vector_elements) { 1597 case 2: 1598 break; 1599 case 3: 1600 temp_dst.writemask = WRITEMASK_Y; 1601 temp1.swizzle = SWIZZLE_YYYY; 1602 temp2.swizzle = SWIZZLE_ZZZZ; 1603 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1604 break; 1605 case 4: 1606 temp_dst.writemask = WRITEMASK_X; 1607 temp1.swizzle = SWIZZLE_XXXX; 1608 temp2.swizzle = SWIZZLE_YYYY; 1609 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1610 temp_dst.writemask = WRITEMASK_Y; 1611 temp1.swizzle = SWIZZLE_ZZZZ; 1612 temp2.swizzle = SWIZZLE_WWWW; 1613 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1614 } 1615 1616 temp1.swizzle = SWIZZLE_XXXX; 1617 temp2.swizzle = SWIZZLE_YYYY; 1618 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1619 } else { 1620 /* After the dot-product, the value will be an integer on the 1621 * range [0,4]. Zero stays zero, and positive values become 1.0. 1622 */ 1623 glsl_to_tgsi_instruction *const dp = 1624 emit_dp(ir, result_dst, temp, temp, vector_elements); 1625 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1626 /* The clamping to [0,1] can be done for free in the fragment 1627 * shader with a saturate. 1628 */ 1629 dp->saturate = true; 1630 } else { 1631 /* Negating the result of the dot-product gives values on the range 1632 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1633 * achieved using SLT. 1634 */ 1635 st_src_reg slt_src = result_src; 1636 slt_src.negate = ~slt_src.negate; 1637 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1638 } 1639 } 1640 } else { 1641 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1642 } 1643 break; 1644 1645 case ir_unop_any: { 1646 assert(ir->operands[0]->type->is_vector()); 1647 1648 /* After the dot-product, the value will be an integer on the 1649 * range [0,4]. Zero stays zero, and positive values become 1.0. 1650 */ 1651 glsl_to_tgsi_instruction *const dp = 1652 emit_dp(ir, result_dst, op[0], op[0], 1653 ir->operands[0]->type->vector_elements); 1654 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1655 result_dst.type == GLSL_TYPE_FLOAT) { 1656 /* The clamping to [0,1] can be done for free in the fragment 1657 * shader with a saturate. 1658 */ 1659 dp->saturate = true; 1660 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1661 /* Negating the result of the dot-product gives values on the range 1662 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1663 * is achieved using SLT. 1664 */ 1665 st_src_reg slt_src = result_src; 1666 slt_src.negate = ~slt_src.negate; 1667 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1668 } 1669 else { 1670 /* Use SNE 0 if integers are being used as boolean values. */ 1671 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1672 } 1673 break; 1674 } 1675 1676 case ir_binop_logic_xor: 1677 if (native_integers) 1678 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1679 else 1680 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1681 break; 1682 1683 case ir_binop_logic_or: { 1684 if (native_integers) { 1685 /* If integers are used as booleans, we can use an actual "or" 1686 * instruction. 1687 */ 1688 assert(native_integers); 1689 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1690 } else { 1691 /* After the addition, the value will be an integer on the 1692 * range [0,2]. Zero stays zero, and positive values become 1.0. 1693 */ 1694 glsl_to_tgsi_instruction *add = 1695 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1696 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1697 /* The clamping to [0,1] can be done for free in the fragment 1698 * shader with a saturate if floats are being used as boolean values. 1699 */ 1700 add->saturate = true; 1701 } else { 1702 /* Negating the result of the addition gives values on the range 1703 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1704 * is achieved using SLT. 1705 */ 1706 st_src_reg slt_src = result_src; 1707 slt_src.negate = ~slt_src.negate; 1708 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1709 } 1710 } 1711 break; 1712 } 1713 1714 case ir_binop_logic_and: 1715 /* If native integers are disabled, the bool args are stored as float 0.0 1716 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1717 * actual AND opcode. 1718 */ 1719 if (native_integers) 1720 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1721 else 1722 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1723 break; 1724 1725 case ir_binop_dot: 1726 assert(ir->operands[0]->type->is_vector()); 1727 assert(ir->operands[0]->type == ir->operands[1]->type); 1728 emit_dp(ir, result_dst, op[0], op[1], 1729 ir->operands[0]->type->vector_elements); 1730 break; 1731 1732 case ir_unop_sqrt: 1733 /* sqrt(x) = x * rsq(x). */ 1734 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1735 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1736 /* For incoming channels <= 0, set the result to 0. */ 1737 op[0].negate = ~op[0].negate; 1738 emit(ir, TGSI_OPCODE_CMP, result_dst, 1739 op[0], result_src, st_src_reg_for_float(0.0)); 1740 break; 1741 case ir_unop_rsq: 1742 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1743 break; 1744 case ir_unop_i2f: 1745 if (native_integers) { 1746 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1747 break; 1748 } 1749 /* fallthrough to next case otherwise */ 1750 case ir_unop_b2f: 1751 if (native_integers) { 1752 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1753 break; 1754 } 1755 /* fallthrough to next case otherwise */ 1756 case ir_unop_i2u: 1757 case ir_unop_u2i: 1758 /* Converting between signed and unsigned integers is a no-op. */ 1759 result_src = op[0]; 1760 break; 1761 case ir_unop_b2i: 1762 if (native_integers) { 1763 /* Booleans are stored as integers using ~0 for true and 0 for false. 1764 * GLSL requires that int(bool) return 1 for true and 0 for false. 1765 * This conversion is done with AND, but it could be done with NEG. 1766 */ 1767 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1768 } else { 1769 /* Booleans and integers are both stored as floats when native 1770 * integers are disabled. 1771 */ 1772 result_src = op[0]; 1773 } 1774 break; 1775 case ir_unop_f2i: 1776 if (native_integers) 1777 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1778 else 1779 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1780 break; 1781 case ir_unop_f2b: 1782 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1783 break; 1784 case ir_unop_i2b: 1785 if (native_integers) 1786 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1787 else 1788 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1789 break; 1790 case ir_unop_trunc: 1791 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1792 break; 1793 case ir_unop_ceil: 1794 op[0].negate = ~op[0].negate; 1795 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1796 result_src.negate = ~result_src.negate; 1797 break; 1798 case ir_unop_floor: 1799 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1800 break; 1801 case ir_unop_fract: 1802 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1803 break; 1804 1805 case ir_binop_min: 1806 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1807 break; 1808 case ir_binop_max: 1809 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1810 break; 1811 case ir_binop_pow: 1812 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1813 break; 1814 1815 case ir_unop_bit_not: 1816 if (native_integers) { 1817 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1818 break; 1819 } 1820 case ir_unop_u2f: 1821 if (native_integers) { 1822 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1823 break; 1824 } 1825 case ir_binop_lshift: 1826 if (native_integers) { 1827 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); 1828 break; 1829 } 1830 case ir_binop_rshift: 1831 if (native_integers) { 1832 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); 1833 break; 1834 } 1835 case ir_binop_bit_and: 1836 if (native_integers) { 1837 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); 1838 break; 1839 } 1840 case ir_binop_bit_xor: 1841 if (native_integers) { 1842 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); 1843 break; 1844 } 1845 case ir_binop_bit_or: 1846 if (native_integers) { 1847 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); 1848 break; 1849 } 1850 case ir_unop_round_even: 1851 assert(!"GLSL 1.30 features unsupported"); 1852 break; 1853 1854 case ir_quadop_vector: 1855 /* This operation should have already been handled. 1856 */ 1857 assert(!"Should not get here."); 1858 break; 1859 } 1860 1861 this->result = result_src; 1862} 1863 1864 1865void 1866glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1867{ 1868 st_src_reg src; 1869 int i; 1870 int swizzle[4]; 1871 1872 /* Note that this is only swizzles in expressions, not those on the left 1873 * hand side of an assignment, which do write masking. See ir_assignment 1874 * for that. 1875 */ 1876 1877 ir->val->accept(this); 1878 src = this->result; 1879 assert(src.file != PROGRAM_UNDEFINED); 1880 1881 for (i = 0; i < 4; i++) { 1882 if (i < ir->type->vector_elements) { 1883 switch (i) { 1884 case 0: 1885 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1886 break; 1887 case 1: 1888 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1889 break; 1890 case 2: 1891 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1892 break; 1893 case 3: 1894 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1895 break; 1896 } 1897 } else { 1898 /* If the type is smaller than a vec4, replicate the last 1899 * channel out. 1900 */ 1901 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1902 } 1903 } 1904 1905 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1906 1907 this->result = src; 1908} 1909 1910void 1911glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1912{ 1913 variable_storage *entry = find_variable_storage(ir->var); 1914 ir_variable *var = ir->var; 1915 1916 if (!entry) { 1917 switch (var->mode) { 1918 case ir_var_uniform: 1919 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1920 var->location); 1921 this->variables.push_tail(entry); 1922 break; 1923 case ir_var_in: 1924 case ir_var_inout: 1925 /* The linker assigns locations for varyings and attributes, 1926 * including deprecated builtins (like gl_Color), user-assign 1927 * generic attributes (glBindVertexLocation), and 1928 * user-defined varyings. 1929 * 1930 * FINISHME: We would hit this path for function arguments. Fix! 1931 */ 1932 assert(var->location != -1); 1933 entry = new(mem_ctx) variable_storage(var, 1934 PROGRAM_INPUT, 1935 var->location); 1936 break; 1937 case ir_var_out: 1938 assert(var->location != -1); 1939 entry = new(mem_ctx) variable_storage(var, 1940 PROGRAM_OUTPUT, 1941 var->location); 1942 break; 1943 case ir_var_system_value: 1944 entry = new(mem_ctx) variable_storage(var, 1945 PROGRAM_SYSTEM_VALUE, 1946 var->location); 1947 break; 1948 case ir_var_auto: 1949 case ir_var_temporary: 1950 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1951 this->next_temp); 1952 this->variables.push_tail(entry); 1953 1954 next_temp += type_size(var->type); 1955 break; 1956 } 1957 1958 if (!entry) { 1959 printf("Failed to make storage for %s\n", var->name); 1960 exit(1); 1961 } 1962 } 1963 1964 this->result = st_src_reg(entry->file, entry->index, var->type); 1965 if (!native_integers) 1966 this->result.type = GLSL_TYPE_FLOAT; 1967} 1968 1969void 1970glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1971{ 1972 ir_constant *index; 1973 st_src_reg src; 1974 int element_size = type_size(ir->type); 1975 1976 index = ir->array_index->constant_expression_value(); 1977 1978 ir->array->accept(this); 1979 src = this->result; 1980 1981 if (index) { 1982 src.index += index->value.i[0] * element_size; 1983 } else { 1984 /* Variable index array dereference. It eats the "vec4" of the 1985 * base of the array and an index that offsets the TGSI register 1986 * index. 1987 */ 1988 ir->array_index->accept(this); 1989 1990 st_src_reg index_reg; 1991 1992 if (element_size == 1) { 1993 index_reg = this->result; 1994 } else { 1995 index_reg = get_temp(native_integers ? 1996 glsl_type::int_type : glsl_type::float_type); 1997 1998 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1999 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2000 } 2001 2002 /* If there was already a relative address register involved, add the 2003 * new and the old together to get the new offset. 2004 */ 2005 if (src.reladdr != NULL) { 2006 st_src_reg accum_reg = get_temp(native_integers ? 2007 glsl_type::int_type : glsl_type::float_type); 2008 2009 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2010 index_reg, *src.reladdr); 2011 2012 index_reg = accum_reg; 2013 } 2014 2015 src.reladdr = ralloc(mem_ctx, st_src_reg); 2016 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2017 } 2018 2019 /* If the type is smaller than a vec4, replicate the last channel out. */ 2020 if (ir->type->is_scalar() || ir->type->is_vector()) 2021 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2022 else 2023 src.swizzle = SWIZZLE_NOOP; 2024 2025 this->result = src; 2026} 2027 2028void 2029glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2030{ 2031 unsigned int i; 2032 const glsl_type *struct_type = ir->record->type; 2033 int offset = 0; 2034 2035 ir->record->accept(this); 2036 2037 for (i = 0; i < struct_type->length; i++) { 2038 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2039 break; 2040 offset += type_size(struct_type->fields.structure[i].type); 2041 } 2042 2043 /* If the type is smaller than a vec4, replicate the last channel out. */ 2044 if (ir->type->is_scalar() || ir->type->is_vector()) 2045 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2046 else 2047 this->result.swizzle = SWIZZLE_NOOP; 2048 2049 this->result.index += offset; 2050} 2051 2052/** 2053 * We want to be careful in assignment setup to hit the actual storage 2054 * instead of potentially using a temporary like we might with the 2055 * ir_dereference handler. 2056 */ 2057static st_dst_reg 2058get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2059{ 2060 /* The LHS must be a dereference. If the LHS is a variable indexed array 2061 * access of a vector, it must be separated into a series conditional moves 2062 * before reaching this point (see ir_vec_index_to_cond_assign). 2063 */ 2064 assert(ir->as_dereference()); 2065 ir_dereference_array *deref_array = ir->as_dereference_array(); 2066 if (deref_array) { 2067 assert(!deref_array->array->type->is_vector()); 2068 } 2069 2070 /* Use the rvalue deref handler for the most part. We'll ignore 2071 * swizzles in it and write swizzles using writemask, though. 2072 */ 2073 ir->accept(v); 2074 return st_dst_reg(v->result); 2075} 2076 2077/** 2078 * Process the condition of a conditional assignment 2079 * 2080 * Examines the condition of a conditional assignment to generate the optimal 2081 * first operand of a \c CMP instruction. If the condition is a relational 2082 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2083 * used as the source for the \c CMP instruction. Otherwise the comparison 2084 * is processed to a boolean result, and the boolean result is used as the 2085 * operand to the CMP instruction. 2086 */ 2087bool 2088glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2089{ 2090 ir_rvalue *src_ir = ir; 2091 bool negate = true; 2092 bool switch_order = false; 2093 2094 ir_expression *const expr = ir->as_expression(); 2095 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2096 bool zero_on_left = false; 2097 2098 if (expr->operands[0]->is_zero()) { 2099 src_ir = expr->operands[1]; 2100 zero_on_left = true; 2101 } else if (expr->operands[1]->is_zero()) { 2102 src_ir = expr->operands[0]; 2103 zero_on_left = false; 2104 } 2105 2106 /* a is - 0 + - 0 + 2107 * (a < 0) T F F ( a < 0) T F F 2108 * (0 < a) F F T (-a < 0) F F T 2109 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2110 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2111 * (a > 0) F F T (-a < 0) F F T 2112 * (0 > a) T F F ( a < 0) T F F 2113 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2114 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2115 * 2116 * Note that exchanging the order of 0 and 'a' in the comparison simply 2117 * means that the value of 'a' should be negated. 2118 */ 2119 if (src_ir != ir) { 2120 switch (expr->operation) { 2121 case ir_binop_less: 2122 switch_order = false; 2123 negate = zero_on_left; 2124 break; 2125 2126 case ir_binop_greater: 2127 switch_order = false; 2128 negate = !zero_on_left; 2129 break; 2130 2131 case ir_binop_lequal: 2132 switch_order = true; 2133 negate = !zero_on_left; 2134 break; 2135 2136 case ir_binop_gequal: 2137 switch_order = true; 2138 negate = zero_on_left; 2139 break; 2140 2141 default: 2142 /* This isn't the right kind of comparison afterall, so make sure 2143 * the whole condition is visited. 2144 */ 2145 src_ir = ir; 2146 break; 2147 } 2148 } 2149 } 2150 2151 src_ir->accept(this); 2152 2153 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2154 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2155 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2156 * computing the condition. 2157 */ 2158 if (negate) 2159 this->result.negate = ~this->result.negate; 2160 2161 return switch_order; 2162} 2163 2164void 2165glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2166{ 2167 st_dst_reg l; 2168 st_src_reg r; 2169 int i; 2170 2171 ir->rhs->accept(this); 2172 r = this->result; 2173 2174 l = get_assignment_lhs(ir->lhs, this); 2175 2176 /* FINISHME: This should really set to the correct maximal writemask for each 2177 * FINISHME: component written (in the loops below). This case can only 2178 * FINISHME: occur for matrices, arrays, and structures. 2179 */ 2180 if (ir->write_mask == 0) { 2181 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2182 l.writemask = WRITEMASK_XYZW; 2183 } else if (ir->lhs->type->is_scalar() && 2184 ir->lhs->variable_referenced()->mode == ir_var_out) { 2185 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2186 * FINISHME: W component of fragment shader output zero, work correctly. 2187 */ 2188 l.writemask = WRITEMASK_XYZW; 2189 } else { 2190 int swizzles[4]; 2191 int first_enabled_chan = 0; 2192 int rhs_chan = 0; 2193 2194 l.writemask = ir->write_mask; 2195 2196 for (int i = 0; i < 4; i++) { 2197 if (l.writemask & (1 << i)) { 2198 first_enabled_chan = GET_SWZ(r.swizzle, i); 2199 break; 2200 } 2201 } 2202 2203 /* Swizzle a small RHS vector into the channels being written. 2204 * 2205 * glsl ir treats write_mask as dictating how many channels are 2206 * present on the RHS while TGSI treats write_mask as just 2207 * showing which channels of the vec4 RHS get written. 2208 */ 2209 for (int i = 0; i < 4; i++) { 2210 if (l.writemask & (1 << i)) 2211 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2212 else 2213 swizzles[i] = first_enabled_chan; 2214 } 2215 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2216 swizzles[2], swizzles[3]); 2217 } 2218 2219 assert(l.file != PROGRAM_UNDEFINED); 2220 assert(r.file != PROGRAM_UNDEFINED); 2221 2222 if (ir->condition) { 2223 const bool switch_order = this->process_move_condition(ir->condition); 2224 st_src_reg condition = this->result; 2225 2226 for (i = 0; i < type_size(ir->lhs->type); i++) { 2227 st_src_reg l_src = st_src_reg(l); 2228 st_src_reg condition_temp = condition; 2229 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2230 2231 if (native_integers) { 2232 /* This is necessary because TGSI's CMP instruction expects the 2233 * condition to be a float, and we store booleans as integers. 2234 * If TGSI had a UCMP instruction or similar, this extra 2235 * instruction would not be necessary. 2236 */ 2237 condition_temp = get_temp(glsl_type::vec4_type); 2238 condition.negate = 0; 2239 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2240 condition_temp.swizzle = condition.swizzle; 2241 } 2242 2243 if (switch_order) { 2244 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2245 } else { 2246 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2247 } 2248 2249 l.index++; 2250 r.index++; 2251 } 2252 } else if (ir->rhs->as_expression() && 2253 this->instructions.get_tail() && 2254 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2255 type_size(ir->lhs->type) == 1 && 2256 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2257 /* To avoid emitting an extra MOV when assigning an expression to a 2258 * variable, emit the last instruction of the expression again, but 2259 * replace the destination register with the target of the assignment. 2260 * Dead code elimination will remove the original instruction. 2261 */ 2262 glsl_to_tgsi_instruction *inst, *new_inst; 2263 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2264 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2265 new_inst->saturate = inst->saturate; 2266 inst->dead_mask = inst->dst.writemask; 2267 } else { 2268 for (i = 0; i < type_size(ir->lhs->type); i++) { 2269 emit(ir, TGSI_OPCODE_MOV, l, r); 2270 l.index++; 2271 r.index++; 2272 } 2273 } 2274} 2275 2276 2277void 2278glsl_to_tgsi_visitor::visit(ir_constant *ir) 2279{ 2280 st_src_reg src; 2281 GLfloat stack_vals[4] = { 0 }; 2282 gl_constant_value *values = (gl_constant_value *) stack_vals; 2283 GLenum gl_type = GL_NONE; 2284 unsigned int i; 2285 static int in_array = 0; 2286 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2287 2288 /* Unfortunately, 4 floats is all we can get into 2289 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2290 * aggregate constant and move each constant value into it. If we 2291 * get lucky, copy propagation will eliminate the extra moves. 2292 */ 2293 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2294 st_src_reg temp_base = get_temp(ir->type); 2295 st_dst_reg temp = st_dst_reg(temp_base); 2296 2297 foreach_iter(exec_list_iterator, iter, ir->components) { 2298 ir_constant *field_value = (ir_constant *)iter.get(); 2299 int size = type_size(field_value->type); 2300 2301 assert(size > 0); 2302 2303 field_value->accept(this); 2304 src = this->result; 2305 2306 for (i = 0; i < (unsigned int)size; i++) { 2307 emit(ir, TGSI_OPCODE_MOV, temp, src); 2308 2309 src.index++; 2310 temp.index++; 2311 } 2312 } 2313 this->result = temp_base; 2314 return; 2315 } 2316 2317 if (ir->type->is_array()) { 2318 st_src_reg temp_base = get_temp(ir->type); 2319 st_dst_reg temp = st_dst_reg(temp_base); 2320 int size = type_size(ir->type->fields.array); 2321 2322 assert(size > 0); 2323 in_array++; 2324 2325 for (i = 0; i < ir->type->length; i++) { 2326 ir->array_elements[i]->accept(this); 2327 src = this->result; 2328 for (int j = 0; j < size; j++) { 2329 emit(ir, TGSI_OPCODE_MOV, temp, src); 2330 2331 src.index++; 2332 temp.index++; 2333 } 2334 } 2335 this->result = temp_base; 2336 in_array--; 2337 return; 2338 } 2339 2340 if (ir->type->is_matrix()) { 2341 st_src_reg mat = get_temp(ir->type); 2342 st_dst_reg mat_column = st_dst_reg(mat); 2343 2344 for (i = 0; i < ir->type->matrix_columns; i++) { 2345 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2346 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2347 2348 src = st_src_reg(file, -1, ir->type->base_type); 2349 src.index = add_constant(file, 2350 values, 2351 ir->type->vector_elements, 2352 GL_FLOAT, 2353 &src.swizzle); 2354 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2355 2356 mat_column.index++; 2357 } 2358 2359 this->result = mat; 2360 return; 2361 } 2362 2363 switch (ir->type->base_type) { 2364 case GLSL_TYPE_FLOAT: 2365 gl_type = GL_FLOAT; 2366 for (i = 0; i < ir->type->vector_elements; i++) { 2367 values[i].f = ir->value.f[i]; 2368 } 2369 break; 2370 case GLSL_TYPE_UINT: 2371 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2372 for (i = 0; i < ir->type->vector_elements; i++) { 2373 if (native_integers) 2374 values[i].u = ir->value.u[i]; 2375 else 2376 values[i].f = ir->value.u[i]; 2377 } 2378 break; 2379 case GLSL_TYPE_INT: 2380 gl_type = native_integers ? GL_INT : GL_FLOAT; 2381 for (i = 0; i < ir->type->vector_elements; i++) { 2382 if (native_integers) 2383 values[i].i = ir->value.i[i]; 2384 else 2385 values[i].f = ir->value.i[i]; 2386 } 2387 break; 2388 case GLSL_TYPE_BOOL: 2389 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2390 for (i = 0; i < ir->type->vector_elements; i++) { 2391 if (native_integers) 2392 values[i].b = ir->value.b[i]; 2393 else 2394 values[i].f = ir->value.b[i]; 2395 } 2396 break; 2397 default: 2398 assert(!"Non-float/uint/int/bool constant"); 2399 } 2400 2401 this->result = st_src_reg(file, -1, ir->type); 2402 this->result.index = add_constant(file, 2403 values, 2404 ir->type->vector_elements, 2405 gl_type, 2406 &this->result.swizzle); 2407} 2408 2409function_entry * 2410glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2411{ 2412 function_entry *entry; 2413 2414 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2415 entry = (function_entry *)iter.get(); 2416 2417 if (entry->sig == sig) 2418 return entry; 2419 } 2420 2421 entry = ralloc(mem_ctx, function_entry); 2422 entry->sig = sig; 2423 entry->sig_id = this->next_signature_id++; 2424 entry->bgn_inst = NULL; 2425 2426 /* Allocate storage for all the parameters. */ 2427 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2428 ir_variable *param = (ir_variable *)iter.get(); 2429 variable_storage *storage; 2430 2431 storage = find_variable_storage(param); 2432 assert(!storage); 2433 2434 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2435 this->next_temp); 2436 this->variables.push_tail(storage); 2437 2438 this->next_temp += type_size(param->type); 2439 } 2440 2441 if (!sig->return_type->is_void()) { 2442 entry->return_reg = get_temp(sig->return_type); 2443 } else { 2444 entry->return_reg = undef_src; 2445 } 2446 2447 this->function_signatures.push_tail(entry); 2448 return entry; 2449} 2450 2451void 2452glsl_to_tgsi_visitor::visit(ir_call *ir) 2453{ 2454 glsl_to_tgsi_instruction *call_inst; 2455 ir_function_signature *sig = ir->get_callee(); 2456 function_entry *entry = get_function_signature(sig); 2457 int i; 2458 2459 /* Process in parameters. */ 2460 exec_list_iterator sig_iter = sig->parameters.iterator(); 2461 foreach_iter(exec_list_iterator, iter, *ir) { 2462 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2463 ir_variable *param = (ir_variable *)sig_iter.get(); 2464 2465 if (param->mode == ir_var_in || 2466 param->mode == ir_var_inout) { 2467 variable_storage *storage = find_variable_storage(param); 2468 assert(storage); 2469 2470 param_rval->accept(this); 2471 st_src_reg r = this->result; 2472 2473 st_dst_reg l; 2474 l.file = storage->file; 2475 l.index = storage->index; 2476 l.reladdr = NULL; 2477 l.writemask = WRITEMASK_XYZW; 2478 l.cond_mask = COND_TR; 2479 2480 for (i = 0; i < type_size(param->type); i++) { 2481 emit(ir, TGSI_OPCODE_MOV, l, r); 2482 l.index++; 2483 r.index++; 2484 } 2485 } 2486 2487 sig_iter.next(); 2488 } 2489 assert(!sig_iter.has_next()); 2490 2491 /* Emit call instruction */ 2492 call_inst = emit(ir, TGSI_OPCODE_CAL); 2493 call_inst->function = entry; 2494 2495 /* Process out parameters. */ 2496 sig_iter = sig->parameters.iterator(); 2497 foreach_iter(exec_list_iterator, iter, *ir) { 2498 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2499 ir_variable *param = (ir_variable *)sig_iter.get(); 2500 2501 if (param->mode == ir_var_out || 2502 param->mode == ir_var_inout) { 2503 variable_storage *storage = find_variable_storage(param); 2504 assert(storage); 2505 2506 st_src_reg r; 2507 r.file = storage->file; 2508 r.index = storage->index; 2509 r.reladdr = NULL; 2510 r.swizzle = SWIZZLE_NOOP; 2511 r.negate = 0; 2512 2513 param_rval->accept(this); 2514 st_dst_reg l = st_dst_reg(this->result); 2515 2516 for (i = 0; i < type_size(param->type); i++) { 2517 emit(ir, TGSI_OPCODE_MOV, l, r); 2518 l.index++; 2519 r.index++; 2520 } 2521 } 2522 2523 sig_iter.next(); 2524 } 2525 assert(!sig_iter.has_next()); 2526 2527 /* Process return value. */ 2528 this->result = entry->return_reg; 2529} 2530 2531void 2532glsl_to_tgsi_visitor::visit(ir_texture *ir) 2533{ 2534 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2535 st_dst_reg result_dst, coord_dst; 2536 glsl_to_tgsi_instruction *inst = NULL; 2537 unsigned opcode = TGSI_OPCODE_NOP; 2538 2539 if (ir->coordinate) { 2540 ir->coordinate->accept(this); 2541 2542 /* Put our coords in a temp. We'll need to modify them for shadow, 2543 * projection, or LOD, so the only case we'd use it as is is if 2544 * we're doing plain old texturing. The optimization passes on 2545 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2546 */ 2547 coord = get_temp(glsl_type::vec4_type); 2548 coord_dst = st_dst_reg(coord); 2549 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2550 } 2551 2552 if (ir->projector) { 2553 ir->projector->accept(this); 2554 projector = this->result; 2555 } 2556 2557 /* Storage for our result. Ideally for an assignment we'd be using 2558 * the actual storage for the result here, instead. 2559 */ 2560 result_src = get_temp(glsl_type::vec4_type); 2561 result_dst = st_dst_reg(result_src); 2562 2563 switch (ir->op) { 2564 case ir_tex: 2565 opcode = TGSI_OPCODE_TEX; 2566 break; 2567 case ir_txb: 2568 opcode = TGSI_OPCODE_TXB; 2569 ir->lod_info.bias->accept(this); 2570 lod_info = this->result; 2571 break; 2572 case ir_txl: 2573 opcode = TGSI_OPCODE_TXL; 2574 ir->lod_info.lod->accept(this); 2575 lod_info = this->result; 2576 break; 2577 case ir_txd: 2578 opcode = TGSI_OPCODE_TXD; 2579 ir->lod_info.grad.dPdx->accept(this); 2580 dx = this->result; 2581 ir->lod_info.grad.dPdy->accept(this); 2582 dy = this->result; 2583 break; 2584 case ir_txs: 2585 opcode = TGSI_OPCODE_TXQ; 2586 ir->lod_info.lod->accept(this); 2587 lod_info = this->result; 2588 break; 2589 case ir_txf: 2590 opcode = TGSI_OPCODE_TXF; 2591 ir->lod_info.lod->accept(this); 2592 lod_info = this->result; 2593 if (ir->offset) { 2594 ir->offset->accept(this); 2595 offset = this->result; 2596 } 2597 break; 2598 } 2599 2600 const glsl_type *sampler_type = ir->sampler->type; 2601 2602 if (ir->projector) { 2603 if (opcode == TGSI_OPCODE_TEX) { 2604 /* Slot the projector in as the last component of the coord. */ 2605 coord_dst.writemask = WRITEMASK_W; 2606 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2607 coord_dst.writemask = WRITEMASK_XYZW; 2608 opcode = TGSI_OPCODE_TXP; 2609 } else { 2610 st_src_reg coord_w = coord; 2611 coord_w.swizzle = SWIZZLE_WWWW; 2612 2613 /* For the other TEX opcodes there's no projective version 2614 * since the last slot is taken up by LOD info. Do the 2615 * projective divide now. 2616 */ 2617 coord_dst.writemask = WRITEMASK_W; 2618 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2619 2620 /* In the case where we have to project the coordinates "by hand," 2621 * the shadow comparator value must also be projected. 2622 */ 2623 st_src_reg tmp_src = coord; 2624 if (ir->shadow_comparitor) { 2625 /* Slot the shadow value in as the second to last component of the 2626 * coord. 2627 */ 2628 ir->shadow_comparitor->accept(this); 2629 2630 tmp_src = get_temp(glsl_type::vec4_type); 2631 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2632 2633 /* Projective division not allowed for array samplers. */ 2634 assert(!sampler_type->sampler_array); 2635 2636 tmp_dst.writemask = WRITEMASK_Z; 2637 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2638 2639 tmp_dst.writemask = WRITEMASK_XY; 2640 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2641 } 2642 2643 coord_dst.writemask = WRITEMASK_XYZ; 2644 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2645 2646 coord_dst.writemask = WRITEMASK_XYZW; 2647 coord.swizzle = SWIZZLE_XYZW; 2648 } 2649 } 2650 2651 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2652 * comparator was put in the correct place (and projected) by the code, 2653 * above, that handles by-hand projection. 2654 */ 2655 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2656 /* Slot the shadow value in as the second to last component of the 2657 * coord. 2658 */ 2659 ir->shadow_comparitor->accept(this); 2660 2661 /* XXX This will need to be updated for cubemap array samplers. */ 2662 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2663 sampler_type->sampler_array) { 2664 coord_dst.writemask = WRITEMASK_W; 2665 } else { 2666 coord_dst.writemask = WRITEMASK_Z; 2667 } 2668 2669 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2670 coord_dst.writemask = WRITEMASK_XYZW; 2671 } 2672 2673 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2674 opcode == TGSI_OPCODE_TXF) { 2675 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2676 coord_dst.writemask = WRITEMASK_W; 2677 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2678 coord_dst.writemask = WRITEMASK_XYZW; 2679 } 2680 2681 if (opcode == TGSI_OPCODE_TXD) 2682 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2683 else if (opcode == TGSI_OPCODE_TXQ) 2684 inst = emit(ir, opcode, result_dst, lod_info); 2685 else if (opcode == TGSI_OPCODE_TXF) { 2686 inst = emit(ir, opcode, result_dst, coord); 2687 } else 2688 inst = emit(ir, opcode, result_dst, coord); 2689 2690 if (ir->shadow_comparitor) 2691 inst->tex_shadow = GL_TRUE; 2692 2693 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2694 this->shader_program, 2695 this->prog); 2696 2697 if (ir->offset) { 2698 inst->tex_offset_num_offset = 1; 2699 inst->tex_offsets[0].Index = offset.index; 2700 inst->tex_offsets[0].File = offset.file; 2701 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2702 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2703 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2704 } 2705 2706 switch (sampler_type->sampler_dimensionality) { 2707 case GLSL_SAMPLER_DIM_1D: 2708 inst->tex_target = (sampler_type->sampler_array) 2709 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2710 break; 2711 case GLSL_SAMPLER_DIM_2D: 2712 inst->tex_target = (sampler_type->sampler_array) 2713 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2714 break; 2715 case GLSL_SAMPLER_DIM_3D: 2716 inst->tex_target = TEXTURE_3D_INDEX; 2717 break; 2718 case GLSL_SAMPLER_DIM_CUBE: 2719 inst->tex_target = TEXTURE_CUBE_INDEX; 2720 break; 2721 case GLSL_SAMPLER_DIM_RECT: 2722 inst->tex_target = TEXTURE_RECT_INDEX; 2723 break; 2724 case GLSL_SAMPLER_DIM_BUF: 2725 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2726 break; 2727 default: 2728 assert(!"Should not get here."); 2729 } 2730 2731 this->result = result_src; 2732} 2733 2734void 2735glsl_to_tgsi_visitor::visit(ir_return *ir) 2736{ 2737 if (ir->get_value()) { 2738 st_dst_reg l; 2739 int i; 2740 2741 assert(current_function); 2742 2743 ir->get_value()->accept(this); 2744 st_src_reg r = this->result; 2745 2746 l = st_dst_reg(current_function->return_reg); 2747 2748 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2749 emit(ir, TGSI_OPCODE_MOV, l, r); 2750 l.index++; 2751 r.index++; 2752 } 2753 } 2754 2755 emit(ir, TGSI_OPCODE_RET); 2756} 2757 2758void 2759glsl_to_tgsi_visitor::visit(ir_discard *ir) 2760{ 2761 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2762 2763 if (ir->condition) { 2764 ir->condition->accept(this); 2765 this->result.negate = ~this->result.negate; 2766 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2767 } else { 2768 emit(ir, TGSI_OPCODE_KILP); 2769 } 2770 2771 fp->UsesKill = GL_TRUE; 2772} 2773 2774void 2775glsl_to_tgsi_visitor::visit(ir_if *ir) 2776{ 2777 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2778 glsl_to_tgsi_instruction *prev_inst; 2779 2780 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2781 2782 ir->condition->accept(this); 2783 assert(this->result.file != PROGRAM_UNDEFINED); 2784 2785 if (this->options->EmitCondCodes) { 2786 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2787 2788 /* See if we actually generated any instruction for generating 2789 * the condition. If not, then cook up a move to a temp so we 2790 * have something to set cond_update on. 2791 */ 2792 if (cond_inst == prev_inst) { 2793 st_src_reg temp = get_temp(glsl_type::bool_type); 2794 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2795 } 2796 cond_inst->cond_update = GL_TRUE; 2797 2798 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2799 if_inst->dst.cond_mask = COND_NE; 2800 } else { 2801 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2802 } 2803 2804 this->instructions.push_tail(if_inst); 2805 2806 visit_exec_list(&ir->then_instructions, this); 2807 2808 if (!ir->else_instructions.is_empty()) { 2809 emit(ir->condition, TGSI_OPCODE_ELSE); 2810 visit_exec_list(&ir->else_instructions, this); 2811 } 2812 2813 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2814} 2815 2816glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2817{ 2818 result.file = PROGRAM_UNDEFINED; 2819 next_temp = 1; 2820 next_signature_id = 1; 2821 num_immediates = 0; 2822 current_function = NULL; 2823 num_address_regs = 0; 2824 indirect_addr_temps = false; 2825 indirect_addr_consts = false; 2826 mem_ctx = ralloc_context(NULL); 2827} 2828 2829glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2830{ 2831 ralloc_free(mem_ctx); 2832} 2833 2834extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2835{ 2836 delete v; 2837} 2838 2839 2840/** 2841 * Count resources used by the given gpu program (number of texture 2842 * samplers, etc). 2843 */ 2844static void 2845count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2846{ 2847 v->samplers_used = 0; 2848 2849 foreach_iter(exec_list_iterator, iter, v->instructions) { 2850 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2851 2852 if (is_tex_instruction(inst->op)) { 2853 v->samplers_used |= 1 << inst->sampler; 2854 2855 prog->SamplerTargets[inst->sampler] = 2856 (gl_texture_index)inst->tex_target; 2857 if (inst->tex_shadow) { 2858 prog->ShadowSamplers |= 1 << inst->sampler; 2859 } 2860 } 2861 } 2862 2863 prog->SamplersUsed = v->samplers_used; 2864 _mesa_update_shader_textures_used(prog); 2865} 2866 2867 2868/** 2869 * Check if the given vertex/fragment/shader program is within the 2870 * resource limits of the context (number of texture units, etc). 2871 * If any of those checks fail, record a linker error. 2872 * 2873 * XXX more checks are needed... 2874 */ 2875static void 2876check_resources(const struct gl_context *ctx, 2877 struct gl_shader_program *shader_program, 2878 glsl_to_tgsi_visitor *prog, 2879 struct gl_program *proginfo) 2880{ 2881 switch (proginfo->Target) { 2882 case GL_VERTEX_PROGRAM_ARB: 2883 if (_mesa_bitcount(prog->samplers_used) > 2884 ctx->Const.MaxVertexTextureImageUnits) { 2885 fail_link(shader_program, "Too many vertex shader texture samplers"); 2886 } 2887 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2888 fail_link(shader_program, "Too many vertex shader constants"); 2889 } 2890 break; 2891 case MESA_GEOMETRY_PROGRAM: 2892 if (_mesa_bitcount(prog->samplers_used) > 2893 ctx->Const.MaxGeometryTextureImageUnits) { 2894 fail_link(shader_program, "Too many geometry shader texture samplers"); 2895 } 2896 if (proginfo->Parameters->NumParameters > 2897 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { 2898 fail_link(shader_program, "Too many geometry shader constants"); 2899 } 2900 break; 2901 case GL_FRAGMENT_PROGRAM_ARB: 2902 if (_mesa_bitcount(prog->samplers_used) > 2903 ctx->Const.MaxTextureImageUnits) { 2904 fail_link(shader_program, "Too many fragment shader texture samplers"); 2905 } 2906 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2907 fail_link(shader_program, "Too many fragment shader constants"); 2908 } 2909 break; 2910 default: 2911 _mesa_problem(ctx, "unexpected program type in check_resources()"); 2912 } 2913} 2914 2915 2916 2917struct uniform_sort { 2918 struct gl_uniform *u; 2919 int pos; 2920}; 2921 2922/* The shader_program->Uniforms list is almost sorted in increasing 2923 * uniform->{Frag,Vert}Pos locations, but not quite when there are 2924 * uniforms shared between targets. We need to add parameters in 2925 * increasing order for the targets. 2926 */ 2927static int 2928sort_uniforms(const void *a, const void *b) 2929{ 2930 struct uniform_sort *u1 = (struct uniform_sort *)a; 2931 struct uniform_sort *u2 = (struct uniform_sort *)b; 2932 2933 return u1->pos - u2->pos; 2934} 2935 2936/* Add the uniforms to the parameters. The linker chose locations 2937 * in our parameters lists (which weren't created yet), which the 2938 * uniforms code will use to poke values into our parameters list 2939 * when uniforms are updated. 2940 */ 2941static void 2942add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, 2943 struct gl_shader *shader, 2944 struct gl_program *prog) 2945{ 2946 unsigned int i; 2947 unsigned int next_sampler = 0, num_uniforms = 0; 2948 struct uniform_sort *sorted_uniforms; 2949 2950 sorted_uniforms = ralloc_array(NULL, struct uniform_sort, 2951 shader_program->Uniforms->NumUniforms); 2952 2953 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { 2954 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; 2955 int parameter_index = -1; 2956 2957 switch (shader->Type) { 2958 case GL_VERTEX_SHADER: 2959 parameter_index = uniform->VertPos; 2960 break; 2961 case GL_FRAGMENT_SHADER: 2962 parameter_index = uniform->FragPos; 2963 break; 2964 case GL_GEOMETRY_SHADER: 2965 parameter_index = uniform->GeomPos; 2966 break; 2967 } 2968 2969 /* Only add uniforms used in our target. */ 2970 if (parameter_index != -1) { 2971 sorted_uniforms[num_uniforms].pos = parameter_index; 2972 sorted_uniforms[num_uniforms].u = uniform; 2973 num_uniforms++; 2974 } 2975 } 2976 2977 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), 2978 sort_uniforms); 2979 2980 for (i = 0; i < num_uniforms; i++) { 2981 struct gl_uniform *uniform = sorted_uniforms[i].u; 2982 int parameter_index = sorted_uniforms[i].pos; 2983 const glsl_type *type = uniform->Type; 2984 unsigned int size; 2985 2986 if (type->is_vector() || 2987 type->is_scalar()) { 2988 size = type->vector_elements; 2989 } else { 2990 size = type_size(type) * 4; 2991 } 2992 2993 gl_register_file file; 2994 if (type->is_sampler() || 2995 (type->is_array() && type->fields.array->is_sampler())) { 2996 file = PROGRAM_SAMPLER; 2997 } else { 2998 file = PROGRAM_UNIFORM; 2999 } 3000 3001 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, 3002 uniform->Name); 3003 3004 if (index < 0) { 3005 index = _mesa_add_parameter(prog->Parameters, file, 3006 uniform->Name, size, type->gl_type, 3007 NULL, NULL, 0x0); 3008 3009 /* Sampler uniform values are stored in prog->SamplerUnits, 3010 * and the entry in that array is selected by this index we 3011 * store in ParameterValues[]. 3012 */ 3013 if (file == PROGRAM_SAMPLER) { 3014 for (unsigned int j = 0; j < size / 4; j++) 3015 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; 3016 } 3017 3018 /* The location chosen in the Parameters list here (returned 3019 * from _mesa_add_uniform) has to match what the linker chose. 3020 */ 3021 if (index != parameter_index) { 3022 fail_link(shader_program, "Allocation of uniform `%s' to target " 3023 "failed (%d vs %d)\n", 3024 uniform->Name, index, parameter_index); 3025 } 3026 } 3027 } 3028 3029 ralloc_free(sorted_uniforms); 3030} 3031 3032static void 3033set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 3034 struct gl_shader_program *shader_program, 3035 const char *name, const glsl_type *type, 3036 ir_constant *val) 3037{ 3038 if (type->is_record()) { 3039 ir_constant *field_constant; 3040 3041 field_constant = (ir_constant *)val->components.get_head(); 3042 3043 for (unsigned int i = 0; i < type->length; i++) { 3044 const glsl_type *field_type = type->fields.structure[i].type; 3045 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 3046 type->fields.structure[i].name); 3047 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 3048 field_type, field_constant); 3049 field_constant = (ir_constant *)field_constant->next; 3050 } 3051 return; 3052 } 3053 3054 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 3055 3056 if (loc == -1) { 3057 fail_link(shader_program, 3058 "Couldn't find uniform for initializer %s\n", name); 3059 return; 3060 } 3061 3062 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 3063 ir_constant *element; 3064 const glsl_type *element_type; 3065 if (type->is_array()) { 3066 element = val->array_elements[i]; 3067 element_type = type->fields.array; 3068 } else { 3069 element = val; 3070 element_type = type; 3071 } 3072 3073 void *values; 3074 3075 if (element_type->base_type == GLSL_TYPE_BOOL) { 3076 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 3077 for (unsigned int j = 0; j < element_type->components(); j++) { 3078 conv[j] = element->value.b[j]; 3079 } 3080 values = (void *)conv; 3081 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 3082 element_type->vector_elements, 3083 1); 3084 } else { 3085 values = &element->value; 3086 } 3087 3088 if (element_type->is_matrix()) { 3089 _mesa_uniform_matrix(ctx, shader_program, 3090 element_type->matrix_columns, 3091 element_type->vector_elements, 3092 loc, 1, GL_FALSE, (GLfloat *)values); 3093 loc += element_type->matrix_columns; 3094 } else { 3095 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 3096 values, element_type->gl_type); 3097 loc += type_size(element_type); 3098 } 3099 } 3100} 3101 3102/* 3103 * Scan/rewrite program to remove reads of custom (output) registers. 3104 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 3105 * (for vertex shaders). 3106 * In GLSL shaders, varying vars can be read and written. 3107 * On some hardware, trying to read an output register causes trouble. 3108 * So, rewrite the program to use a temporary register in this case. 3109 * 3110 * Based on _mesa_remove_output_reads from programopt.c. 3111 */ 3112void 3113glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 3114{ 3115 GLuint i; 3116 GLint outputMap[VERT_RESULT_MAX]; 3117 GLint outputTypes[VERT_RESULT_MAX]; 3118 GLuint numVaryingReads = 0; 3119 GLboolean usedTemps[MAX_TEMPS]; 3120 GLuint firstTemp = 0; 3121 3122 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 3123 usedTemps, MAX_TEMPS); 3124 3125 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 3126 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 3127 3128 for (i = 0; i < VERT_RESULT_MAX; i++) 3129 outputMap[i] = -1; 3130 3131 /* look for instructions which read from varying vars */ 3132 foreach_iter(exec_list_iterator, iter, this->instructions) { 3133 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3134 const GLuint numSrc = num_inst_src_regs(inst->op); 3135 GLuint j; 3136 for (j = 0; j < numSrc; j++) { 3137 if (inst->src[j].file == type) { 3138 /* replace the read with a temp reg */ 3139 const GLuint var = inst->src[j].index; 3140 if (outputMap[var] == -1) { 3141 numVaryingReads++; 3142 outputMap[var] = _mesa_find_free_register(usedTemps, 3143 MAX_TEMPS, 3144 firstTemp); 3145 outputTypes[var] = inst->src[j].type; 3146 firstTemp = outputMap[var] + 1; 3147 } 3148 inst->src[j].file = PROGRAM_TEMPORARY; 3149 inst->src[j].index = outputMap[var]; 3150 } 3151 } 3152 } 3153 3154 if (numVaryingReads == 0) 3155 return; /* nothing to be done */ 3156 3157 /* look for instructions which write to the varying vars identified above */ 3158 foreach_iter(exec_list_iterator, iter, this->instructions) { 3159 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3160 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 3161 /* change inst to write to the temp reg, instead of the varying */ 3162 inst->dst.file = PROGRAM_TEMPORARY; 3163 inst->dst.index = outputMap[inst->dst.index]; 3164 } 3165 } 3166 3167 /* insert new MOV instructions at the end */ 3168 for (i = 0; i < VERT_RESULT_MAX; i++) { 3169 if (outputMap[i] >= 0) { 3170 /* MOV VAR[i], TEMP[tmp]; */ 3171 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 3172 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 3173 dst.index = i; 3174 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 3175 } 3176 } 3177} 3178 3179/** 3180 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 3181 * are read from the given src in this instruction 3182 */ 3183static int 3184get_src_arg_mask(st_dst_reg dst, st_src_reg src) 3185{ 3186 int read_mask = 0, comp; 3187 3188 /* Now, given the src swizzle and the written channels, find which 3189 * components are actually read 3190 */ 3191 for (comp = 0; comp < 4; ++comp) { 3192 const unsigned coord = GET_SWZ(src.swizzle, comp); 3193 ASSERT(coord < 4); 3194 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 3195 read_mask |= 1 << coord; 3196 } 3197 3198 return read_mask; 3199} 3200 3201/** 3202 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 3203 * instruction is the first instruction to write to register T0. There are 3204 * several lowering passes done in GLSL IR (e.g. branches and 3205 * relative addressing) that create a large number of conditional assignments 3206 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 3207 * 3208 * Here is why this conversion is safe: 3209 * CMP T0, T1 T2 T0 can be expanded to: 3210 * if (T1 < 0.0) 3211 * MOV T0, T2; 3212 * else 3213 * MOV T0, T0; 3214 * 3215 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3216 * as the original program. If (T1 < 0.0) evaluates to false, executing 3217 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3218 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3219 * because any instruction that was going to read from T0 after this was going 3220 * to read a garbage value anyway. 3221 */ 3222void 3223glsl_to_tgsi_visitor::simplify_cmp(void) 3224{ 3225 unsigned tempWrites[MAX_TEMPS]; 3226 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3227 3228 memset(tempWrites, 0, sizeof(tempWrites)); 3229 memset(outputWrites, 0, sizeof(outputWrites)); 3230 3231 foreach_iter(exec_list_iterator, iter, this->instructions) { 3232 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3233 unsigned prevWriteMask = 0; 3234 3235 /* Give up if we encounter relative addressing or flow control. */ 3236 if (inst->dst.reladdr || 3237 tgsi_get_opcode_info(inst->op)->is_branch || 3238 inst->op == TGSI_OPCODE_BGNSUB || 3239 inst->op == TGSI_OPCODE_CONT || 3240 inst->op == TGSI_OPCODE_END || 3241 inst->op == TGSI_OPCODE_ENDSUB || 3242 inst->op == TGSI_OPCODE_RET) { 3243 return; 3244 } 3245 3246 if (inst->dst.file == PROGRAM_OUTPUT) { 3247 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3248 prevWriteMask = outputWrites[inst->dst.index]; 3249 outputWrites[inst->dst.index] |= inst->dst.writemask; 3250 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3251 assert(inst->dst.index < MAX_TEMPS); 3252 prevWriteMask = tempWrites[inst->dst.index]; 3253 tempWrites[inst->dst.index] |= inst->dst.writemask; 3254 } 3255 3256 /* For a CMP to be considered a conditional write, the destination 3257 * register and source register two must be the same. */ 3258 if (inst->op == TGSI_OPCODE_CMP 3259 && !(inst->dst.writemask & prevWriteMask) 3260 && inst->src[2].file == inst->dst.file 3261 && inst->src[2].index == inst->dst.index 3262 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3263 3264 inst->op = TGSI_OPCODE_MOV; 3265 inst->src[0] = inst->src[1]; 3266 } 3267 } 3268} 3269 3270/* Replaces all references to a temporary register index with another index. */ 3271void 3272glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3273{ 3274 foreach_iter(exec_list_iterator, iter, this->instructions) { 3275 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3276 unsigned j; 3277 3278 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3279 if (inst->src[j].file == PROGRAM_TEMPORARY && 3280 inst->src[j].index == index) { 3281 inst->src[j].index = new_index; 3282 } 3283 } 3284 3285 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3286 inst->dst.index = new_index; 3287 } 3288 } 3289} 3290 3291int 3292glsl_to_tgsi_visitor::get_first_temp_read(int index) 3293{ 3294 int depth = 0; /* loop depth */ 3295 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3296 unsigned i = 0, j; 3297 3298 foreach_iter(exec_list_iterator, iter, this->instructions) { 3299 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3300 3301 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3302 if (inst->src[j].file == PROGRAM_TEMPORARY && 3303 inst->src[j].index == index) { 3304 return (depth == 0) ? i : loop_start; 3305 } 3306 } 3307 3308 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3309 if(depth++ == 0) 3310 loop_start = i; 3311 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3312 if (--depth == 0) 3313 loop_start = -1; 3314 } 3315 assert(depth >= 0); 3316 3317 i++; 3318 } 3319 3320 return -1; 3321} 3322 3323int 3324glsl_to_tgsi_visitor::get_first_temp_write(int index) 3325{ 3326 int depth = 0; /* loop depth */ 3327 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3328 int i = 0; 3329 3330 foreach_iter(exec_list_iterator, iter, this->instructions) { 3331 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3332 3333 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3334 return (depth == 0) ? i : loop_start; 3335 } 3336 3337 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3338 if(depth++ == 0) 3339 loop_start = i; 3340 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3341 if (--depth == 0) 3342 loop_start = -1; 3343 } 3344 assert(depth >= 0); 3345 3346 i++; 3347 } 3348 3349 return -1; 3350} 3351 3352int 3353glsl_to_tgsi_visitor::get_last_temp_read(int index) 3354{ 3355 int depth = 0; /* loop depth */ 3356 int last = -1; /* index of last instruction that reads the temporary */ 3357 unsigned i = 0, j; 3358 3359 foreach_iter(exec_list_iterator, iter, this->instructions) { 3360 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3361 3362 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3363 if (inst->src[j].file == PROGRAM_TEMPORARY && 3364 inst->src[j].index == index) { 3365 last = (depth == 0) ? i : -2; 3366 } 3367 } 3368 3369 if (inst->op == TGSI_OPCODE_BGNLOOP) 3370 depth++; 3371 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3372 if (--depth == 0 && last == -2) 3373 last = i; 3374 assert(depth >= 0); 3375 3376 i++; 3377 } 3378 3379 assert(last >= -1); 3380 return last; 3381} 3382 3383int 3384glsl_to_tgsi_visitor::get_last_temp_write(int index) 3385{ 3386 int depth = 0; /* loop depth */ 3387 int last = -1; /* index of last instruction that writes to the temporary */ 3388 int i = 0; 3389 3390 foreach_iter(exec_list_iterator, iter, this->instructions) { 3391 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3392 3393 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3394 last = (depth == 0) ? i : -2; 3395 3396 if (inst->op == TGSI_OPCODE_BGNLOOP) 3397 depth++; 3398 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3399 if (--depth == 0 && last == -2) 3400 last = i; 3401 assert(depth >= 0); 3402 3403 i++; 3404 } 3405 3406 assert(last >= -1); 3407 return last; 3408} 3409 3410/* 3411 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3412 * channels for copy propagation and updates following instructions to 3413 * use the original versions. 3414 * 3415 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3416 * will occur. As an example, a TXP production before this pass: 3417 * 3418 * 0: MOV TEMP[1], INPUT[4].xyyy; 3419 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3420 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3421 * 3422 * and after: 3423 * 3424 * 0: MOV TEMP[1], INPUT[4].xyyy; 3425 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3426 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3427 * 3428 * which allows for dead code elimination on TEMP[1]'s writes. 3429 */ 3430void 3431glsl_to_tgsi_visitor::copy_propagate(void) 3432{ 3433 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3434 glsl_to_tgsi_instruction *, 3435 this->next_temp * 4); 3436 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3437 int level = 0; 3438 3439 foreach_iter(exec_list_iterator, iter, this->instructions) { 3440 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3441 3442 assert(inst->dst.file != PROGRAM_TEMPORARY 3443 || inst->dst.index < this->next_temp); 3444 3445 /* First, do any copy propagation possible into the src regs. */ 3446 for (int r = 0; r < 3; r++) { 3447 glsl_to_tgsi_instruction *first = NULL; 3448 bool good = true; 3449 int acp_base = inst->src[r].index * 4; 3450 3451 if (inst->src[r].file != PROGRAM_TEMPORARY || 3452 inst->src[r].reladdr) 3453 continue; 3454 3455 /* See if we can find entries in the ACP consisting of MOVs 3456 * from the same src register for all the swizzled channels 3457 * of this src register reference. 3458 */ 3459 for (int i = 0; i < 4; i++) { 3460 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3461 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3462 3463 if (!copy_chan) { 3464 good = false; 3465 break; 3466 } 3467 3468 assert(acp_level[acp_base + src_chan] <= level); 3469 3470 if (!first) { 3471 first = copy_chan; 3472 } else { 3473 if (first->src[0].file != copy_chan->src[0].file || 3474 first->src[0].index != copy_chan->src[0].index) { 3475 good = false; 3476 break; 3477 } 3478 } 3479 } 3480 3481 if (good) { 3482 /* We've now validated that we can copy-propagate to 3483 * replace this src register reference. Do it. 3484 */ 3485 inst->src[r].file = first->src[0].file; 3486 inst->src[r].index = first->src[0].index; 3487 3488 int swizzle = 0; 3489 for (int i = 0; i < 4; i++) { 3490 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3491 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3492 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3493 (3 * i)); 3494 } 3495 inst->src[r].swizzle = swizzle; 3496 } 3497 } 3498 3499 switch (inst->op) { 3500 case TGSI_OPCODE_BGNLOOP: 3501 case TGSI_OPCODE_ENDLOOP: 3502 /* End of a basic block, clear the ACP entirely. */ 3503 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3504 break; 3505 3506 case TGSI_OPCODE_IF: 3507 ++level; 3508 break; 3509 3510 case TGSI_OPCODE_ENDIF: 3511 case TGSI_OPCODE_ELSE: 3512 /* Clear all channels written inside the block from the ACP, but 3513 * leaving those that were not touched. 3514 */ 3515 for (int r = 0; r < this->next_temp; r++) { 3516 for (int c = 0; c < 4; c++) { 3517 if (!acp[4 * r + c]) 3518 continue; 3519 3520 if (acp_level[4 * r + c] >= level) 3521 acp[4 * r + c] = NULL; 3522 } 3523 } 3524 if (inst->op == TGSI_OPCODE_ENDIF) 3525 --level; 3526 break; 3527 3528 default: 3529 /* Continuing the block, clear any written channels from 3530 * the ACP. 3531 */ 3532 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3533 /* Any temporary might be written, so no copy propagation 3534 * across this instruction. 3535 */ 3536 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3537 } else if (inst->dst.file == PROGRAM_OUTPUT && 3538 inst->dst.reladdr) { 3539 /* Any output might be written, so no copy propagation 3540 * from outputs across this instruction. 3541 */ 3542 for (int r = 0; r < this->next_temp; r++) { 3543 for (int c = 0; c < 4; c++) { 3544 if (!acp[4 * r + c]) 3545 continue; 3546 3547 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3548 acp[4 * r + c] = NULL; 3549 } 3550 } 3551 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3552 inst->dst.file == PROGRAM_OUTPUT) { 3553 /* Clear where it's used as dst. */ 3554 if (inst->dst.file == PROGRAM_TEMPORARY) { 3555 for (int c = 0; c < 4; c++) { 3556 if (inst->dst.writemask & (1 << c)) { 3557 acp[4 * inst->dst.index + c] = NULL; 3558 } 3559 } 3560 } 3561 3562 /* Clear where it's used as src. */ 3563 for (int r = 0; r < this->next_temp; r++) { 3564 for (int c = 0; c < 4; c++) { 3565 if (!acp[4 * r + c]) 3566 continue; 3567 3568 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3569 3570 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3571 acp[4 * r + c]->src[0].index == inst->dst.index && 3572 inst->dst.writemask & (1 << src_chan)) 3573 { 3574 acp[4 * r + c] = NULL; 3575 } 3576 } 3577 } 3578 } 3579 break; 3580 } 3581 3582 /* If this is a copy, add it to the ACP. */ 3583 if (inst->op == TGSI_OPCODE_MOV && 3584 inst->dst.file == PROGRAM_TEMPORARY && 3585 !inst->dst.reladdr && 3586 !inst->saturate && 3587 !inst->src[0].reladdr && 3588 !inst->src[0].negate) { 3589 for (int i = 0; i < 4; i++) { 3590 if (inst->dst.writemask & (1 << i)) { 3591 acp[4 * inst->dst.index + i] = inst; 3592 acp_level[4 * inst->dst.index + i] = level; 3593 } 3594 } 3595 } 3596 } 3597 3598 ralloc_free(acp_level); 3599 ralloc_free(acp); 3600} 3601 3602/* 3603 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3604 * 3605 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3606 * will occur. As an example, a TXP production after copy propagation but 3607 * before this pass: 3608 * 3609 * 0: MOV TEMP[1], INPUT[4].xyyy; 3610 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3611 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3612 * 3613 * and after this pass: 3614 * 3615 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3616 * 3617 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3618 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3619 */ 3620void 3621glsl_to_tgsi_visitor::eliminate_dead_code(void) 3622{ 3623 int i; 3624 3625 for (i=0; i < this->next_temp; i++) { 3626 int last_read = get_last_temp_read(i); 3627 int j = 0; 3628 3629 foreach_iter(exec_list_iterator, iter, this->instructions) { 3630 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3631 3632 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3633 j > last_read) 3634 { 3635 iter.remove(); 3636 delete inst; 3637 } 3638 3639 j++; 3640 } 3641 } 3642} 3643 3644/* 3645 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3646 * code elimination. This is less primitive than eliminate_dead_code(), as it 3647 * is per-channel and can detect consecutive writes without a read between them 3648 * as dead code. However, there is some dead code that can be eliminated by 3649 * eliminate_dead_code() but not this function - for example, this function 3650 * cannot eliminate an instruction writing to a register that is never read and 3651 * is the only instruction writing to that register. 3652 * 3653 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3654 * will occur. 3655 */ 3656int 3657glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3658{ 3659 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3660 glsl_to_tgsi_instruction *, 3661 this->next_temp * 4); 3662 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3663 int level = 0; 3664 int removed = 0; 3665 3666 foreach_iter(exec_list_iterator, iter, this->instructions) { 3667 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3668 3669 assert(inst->dst.file != PROGRAM_TEMPORARY 3670 || inst->dst.index < this->next_temp); 3671 3672 switch (inst->op) { 3673 case TGSI_OPCODE_BGNLOOP: 3674 case TGSI_OPCODE_ENDLOOP: 3675 /* End of a basic block, clear the write array entirely. 3676 * FIXME: This keeps us from killing dead code when the writes are 3677 * on either side of a loop, even when the register isn't touched 3678 * inside the loop. 3679 */ 3680 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3681 break; 3682 3683 case TGSI_OPCODE_ENDIF: 3684 --level; 3685 break; 3686 3687 case TGSI_OPCODE_ELSE: 3688 /* Clear all channels written inside the preceding if block from the 3689 * write array, but leave those that were not touched. 3690 * 3691 * FIXME: This destroys opportunities to remove dead code inside of 3692 * IF blocks that are followed by an ELSE block. 3693 */ 3694 for (int r = 0; r < this->next_temp; r++) { 3695 for (int c = 0; c < 4; c++) { 3696 if (!writes[4 * r + c]) 3697 continue; 3698 3699 if (write_level[4 * r + c] >= level) 3700 writes[4 * r + c] = NULL; 3701 } 3702 } 3703 break; 3704 3705 case TGSI_OPCODE_IF: 3706 ++level; 3707 /* fallthrough to default case to mark the condition as read */ 3708 3709 default: 3710 /* Continuing the block, clear any channels from the write array that 3711 * are read by this instruction. 3712 */ 3713 for (unsigned i = 0; i < Elements(inst->src); i++) { 3714 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3715 /* Any temporary might be read, so no dead code elimination 3716 * across this instruction. 3717 */ 3718 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3719 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3720 /* Clear where it's used as src. */ 3721 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3722 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3723 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3724 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3725 3726 for (int c = 0; c < 4; c++) { 3727 if (src_chans & (1 << c)) { 3728 writes[4 * inst->src[i].index + c] = NULL; 3729 } 3730 } 3731 } 3732 } 3733 break; 3734 } 3735 3736 /* If this instruction writes to a temporary, add it to the write array. 3737 * If there is already an instruction in the write array for one or more 3738 * of the channels, flag that channel write as dead. 3739 */ 3740 if (inst->dst.file == PROGRAM_TEMPORARY && 3741 !inst->dst.reladdr && 3742 !inst->saturate) { 3743 for (int c = 0; c < 4; c++) { 3744 if (inst->dst.writemask & (1 << c)) { 3745 if (writes[4 * inst->dst.index + c]) { 3746 if (write_level[4 * inst->dst.index + c] < level) 3747 continue; 3748 else 3749 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3750 } 3751 writes[4 * inst->dst.index + c] = inst; 3752 write_level[4 * inst->dst.index + c] = level; 3753 } 3754 } 3755 } 3756 } 3757 3758 /* Anything still in the write array at this point is dead code. */ 3759 for (int r = 0; r < this->next_temp; r++) { 3760 for (int c = 0; c < 4; c++) { 3761 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3762 if (inst) 3763 inst->dead_mask |= (1 << c); 3764 } 3765 } 3766 3767 /* Now actually remove the instructions that are completely dead and update 3768 * the writemask of other instructions with dead channels. 3769 */ 3770 foreach_iter(exec_list_iterator, iter, this->instructions) { 3771 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3772 3773 if (!inst->dead_mask || !inst->dst.writemask) 3774 continue; 3775 else if (inst->dead_mask == inst->dst.writemask) { 3776 iter.remove(); 3777 delete inst; 3778 removed++; 3779 } else 3780 inst->dst.writemask &= ~(inst->dead_mask); 3781 } 3782 3783 ralloc_free(write_level); 3784 ralloc_free(writes); 3785 3786 return removed; 3787} 3788 3789/* Merges temporary registers together where possible to reduce the number of 3790 * registers needed to run a program. 3791 * 3792 * Produces optimal code only after copy propagation and dead code elimination 3793 * have been run. */ 3794void 3795glsl_to_tgsi_visitor::merge_registers(void) 3796{ 3797 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3798 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3799 int i, j; 3800 3801 /* Read the indices of the last read and first write to each temp register 3802 * into an array so that we don't have to traverse the instruction list as 3803 * much. */ 3804 for (i=0; i < this->next_temp; i++) { 3805 last_reads[i] = get_last_temp_read(i); 3806 first_writes[i] = get_first_temp_write(i); 3807 } 3808 3809 /* Start looking for registers with non-overlapping usages that can be 3810 * merged together. */ 3811 for (i=0; i < this->next_temp; i++) { 3812 /* Don't touch unused registers. */ 3813 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3814 3815 for (j=0; j < this->next_temp; j++) { 3816 /* Don't touch unused registers. */ 3817 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3818 3819 /* We can merge the two registers if the first write to j is after or 3820 * in the same instruction as the last read from i. Note that the 3821 * register at index i will always be used earlier or at the same time 3822 * as the register at index j. */ 3823 if (first_writes[i] <= first_writes[j] && 3824 last_reads[i] <= first_writes[j]) 3825 { 3826 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3827 3828 /* Update the first_writes and last_reads arrays with the new 3829 * values for the merged register index, and mark the newly unused 3830 * register index as such. */ 3831 last_reads[i] = last_reads[j]; 3832 first_writes[j] = -1; 3833 last_reads[j] = -1; 3834 } 3835 } 3836 } 3837 3838 ralloc_free(last_reads); 3839 ralloc_free(first_writes); 3840} 3841 3842/* Reassign indices to temporary registers by reusing unused indices created 3843 * by optimization passes. */ 3844void 3845glsl_to_tgsi_visitor::renumber_registers(void) 3846{ 3847 int i = 0; 3848 int new_index = 0; 3849 3850 for (i=0; i < this->next_temp; i++) { 3851 if (get_first_temp_read(i) < 0) continue; 3852 if (i != new_index) 3853 rename_temp_register(i, new_index); 3854 new_index++; 3855 } 3856 3857 this->next_temp = new_index; 3858} 3859 3860/** 3861 * Returns a fragment program which implements the current pixel transfer ops. 3862 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3863 */ 3864extern "C" void 3865get_pixel_transfer_visitor(struct st_fragment_program *fp, 3866 glsl_to_tgsi_visitor *original, 3867 int scale_and_bias, int pixel_maps) 3868{ 3869 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3870 struct st_context *st = st_context(original->ctx); 3871 struct gl_program *prog = &fp->Base.Base; 3872 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3873 st_src_reg coord, src0; 3874 st_dst_reg dst0; 3875 glsl_to_tgsi_instruction *inst; 3876 3877 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3878 v->ctx = original->ctx; 3879 v->prog = prog; 3880 v->glsl_version = original->glsl_version; 3881 v->native_integers = original->native_integers; 3882 v->options = original->options; 3883 v->next_temp = original->next_temp; 3884 v->num_address_regs = original->num_address_regs; 3885 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3886 v->indirect_addr_temps = original->indirect_addr_temps; 3887 v->indirect_addr_consts = original->indirect_addr_consts; 3888 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3889 3890 /* 3891 * Get initial pixel color from the texture. 3892 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3893 */ 3894 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3895 src0 = v->get_temp(glsl_type::vec4_type); 3896 dst0 = st_dst_reg(src0); 3897 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3898 inst->sampler = 0; 3899 inst->tex_target = TEXTURE_2D_INDEX; 3900 3901 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 3902 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3903 v->samplers_used |= (1 << 0); 3904 3905 if (scale_and_bias) { 3906 static const gl_state_index scale_state[STATE_LENGTH] = 3907 { STATE_INTERNAL, STATE_PT_SCALE, 3908 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3909 static const gl_state_index bias_state[STATE_LENGTH] = 3910 { STATE_INTERNAL, STATE_PT_BIAS, 3911 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3912 GLint scale_p, bias_p; 3913 st_src_reg scale, bias; 3914 3915 scale_p = _mesa_add_state_reference(params, scale_state); 3916 bias_p = _mesa_add_state_reference(params, bias_state); 3917 3918 /* MAD colorTemp, colorTemp, scale, bias; */ 3919 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3920 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3921 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3922 } 3923 3924 if (pixel_maps) { 3925 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3926 st_dst_reg temp_dst = st_dst_reg(temp); 3927 3928 assert(st->pixel_xfer.pixelmap_texture); 3929 3930 /* With a little effort, we can do four pixel map look-ups with 3931 * two TEX instructions: 3932 */ 3933 3934 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3935 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3936 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3937 inst->sampler = 1; 3938 inst->tex_target = TEXTURE_2D_INDEX; 3939 3940 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3941 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3942 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3943 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3944 inst->sampler = 1; 3945 inst->tex_target = TEXTURE_2D_INDEX; 3946 3947 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3948 v->samplers_used |= (1 << 1); 3949 3950 /* MOV colorTemp, temp; */ 3951 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3952 } 3953 3954 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3955 * new visitor. */ 3956 foreach_iter(exec_list_iterator, iter, original->instructions) { 3957 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3958 st_src_reg src_regs[3]; 3959 3960 if (inst->dst.file == PROGRAM_OUTPUT) 3961 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3962 3963 for (int i=0; i<3; i++) { 3964 src_regs[i] = inst->src[i]; 3965 if (src_regs[i].file == PROGRAM_INPUT && 3966 src_regs[i].index == FRAG_ATTRIB_COL0) 3967 { 3968 src_regs[i].file = PROGRAM_TEMPORARY; 3969 src_regs[i].index = src0.index; 3970 } 3971 else if (src_regs[i].file == PROGRAM_INPUT) 3972 prog->InputsRead |= (1 << src_regs[i].index); 3973 } 3974 3975 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3976 } 3977 3978 /* Make modifications to fragment program info. */ 3979 prog->Parameters = _mesa_combine_parameter_lists(params, 3980 original->prog->Parameters); 3981 _mesa_free_parameter_list(params); 3982 count_resources(v, prog); 3983 fp->glsl_to_tgsi = v; 3984} 3985 3986/** 3987 * Make fragment program for glBitmap: 3988 * Sample the texture and kill the fragment if the bit is 0. 3989 * This program will be combined with the user's fragment program. 3990 * 3991 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3992 */ 3993extern "C" void 3994get_bitmap_visitor(struct st_fragment_program *fp, 3995 glsl_to_tgsi_visitor *original, int samplerIndex) 3996{ 3997 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3998 struct st_context *st = st_context(original->ctx); 3999 struct gl_program *prog = &fp->Base.Base; 4000 st_src_reg coord, src0; 4001 st_dst_reg dst0; 4002 glsl_to_tgsi_instruction *inst; 4003 4004 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 4005 v->ctx = original->ctx; 4006 v->prog = prog; 4007 v->glsl_version = original->glsl_version; 4008 v->native_integers = original->native_integers; 4009 v->options = original->options; 4010 v->next_temp = original->next_temp; 4011 v->num_address_regs = original->num_address_regs; 4012 v->samplers_used = prog->SamplersUsed = original->samplers_used; 4013 v->indirect_addr_temps = original->indirect_addr_temps; 4014 v->indirect_addr_consts = original->indirect_addr_consts; 4015 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 4016 4017 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 4018 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 4019 src0 = v->get_temp(glsl_type::vec4_type); 4020 dst0 = st_dst_reg(src0); 4021 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 4022 inst->sampler = samplerIndex; 4023 inst->tex_target = TEXTURE_2D_INDEX; 4024 4025 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 4026 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 4027 v->samplers_used |= (1 << samplerIndex); 4028 4029 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 4030 src0.negate = NEGATE_XYZW; 4031 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 4032 src0.swizzle = SWIZZLE_XXXX; 4033 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 4034 4035 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 4036 * new visitor. */ 4037 foreach_iter(exec_list_iterator, iter, original->instructions) { 4038 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 4039 st_src_reg src_regs[3]; 4040 4041 if (inst->dst.file == PROGRAM_OUTPUT) 4042 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 4043 4044 for (int i=0; i<3; i++) { 4045 src_regs[i] = inst->src[i]; 4046 if (src_regs[i].file == PROGRAM_INPUT) 4047 prog->InputsRead |= (1 << src_regs[i].index); 4048 } 4049 4050 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 4051 } 4052 4053 /* Make modifications to fragment program info. */ 4054 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 4055 count_resources(v, prog); 4056 fp->glsl_to_tgsi = v; 4057} 4058 4059/* ------------------------- TGSI conversion stuff -------------------------- */ 4060struct label { 4061 unsigned branch_target; 4062 unsigned token; 4063}; 4064 4065/** 4066 * Intermediate state used during shader translation. 4067 */ 4068struct st_translate { 4069 struct ureg_program *ureg; 4070 4071 struct ureg_dst temps[MAX_TEMPS]; 4072 struct ureg_src *constants; 4073 struct ureg_src *immediates; 4074 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 4075 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 4076 struct ureg_dst address[1]; 4077 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 4078 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 4079 4080 /* Extra info for handling point size clamping in vertex shader */ 4081 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 4082 struct ureg_src pointSizeConst; /**< Point size range constant register */ 4083 GLint pointSizeOutIndex; /**< Temp point size output register */ 4084 GLboolean prevInstWrotePointSize; 4085 4086 const GLuint *inputMapping; 4087 const GLuint *outputMapping; 4088 4089 /* For every instruction that contains a label (eg CALL), keep 4090 * details so that we can go back afterwards and emit the correct 4091 * tgsi instruction number for each label. 4092 */ 4093 struct label *labels; 4094 unsigned labels_size; 4095 unsigned labels_count; 4096 4097 /* Keep a record of the tgsi instruction number that each mesa 4098 * instruction starts at, will be used to fix up labels after 4099 * translation. 4100 */ 4101 unsigned *insn; 4102 unsigned insn_size; 4103 unsigned insn_count; 4104 4105 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 4106 4107 boolean error; 4108}; 4109 4110/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 4111static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 4112 TGSI_SEMANTIC_FACE, 4113 TGSI_SEMANTIC_INSTANCEID 4114}; 4115 4116/** 4117 * Make note of a branch to a label in the TGSI code. 4118 * After we've emitted all instructions, we'll go over the list 4119 * of labels built here and patch the TGSI code with the actual 4120 * location of each label. 4121 */ 4122static unsigned *get_label(struct st_translate *t, unsigned branch_target) 4123{ 4124 unsigned i; 4125 4126 if (t->labels_count + 1 >= t->labels_size) { 4127 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 4128 t->labels = (struct label *)realloc(t->labels, 4129 t->labels_size * sizeof(struct label)); 4130 if (t->labels == NULL) { 4131 static unsigned dummy; 4132 t->error = TRUE; 4133 return &dummy; 4134 } 4135 } 4136 4137 i = t->labels_count++; 4138 t->labels[i].branch_target = branch_target; 4139 return &t->labels[i].token; 4140} 4141 4142/** 4143 * Called prior to emitting the TGSI code for each instruction. 4144 * Allocate additional space for instructions if needed. 4145 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 4146 * the next TGSI instruction. 4147 */ 4148static void set_insn_start(struct st_translate *t, unsigned start) 4149{ 4150 if (t->insn_count + 1 >= t->insn_size) { 4151 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 4152 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 4153 if (t->insn == NULL) { 4154 t->error = TRUE; 4155 return; 4156 } 4157 } 4158 4159 t->insn[t->insn_count++] = start; 4160} 4161 4162/** 4163 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 4164 */ 4165static struct ureg_src 4166emit_immediate(struct st_translate *t, 4167 gl_constant_value values[4], 4168 int type, int size) 4169{ 4170 struct ureg_program *ureg = t->ureg; 4171 4172 switch(type) 4173 { 4174 case GL_FLOAT: 4175 return ureg_DECL_immediate(ureg, &values[0].f, size); 4176 case GL_INT: 4177 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 4178 case GL_UNSIGNED_INT: 4179 case GL_BOOL: 4180 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 4181 default: 4182 assert(!"should not get here - type must be float, int, uint, or bool"); 4183 return ureg_src_undef(); 4184 } 4185} 4186 4187/** 4188 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 4189 */ 4190static struct ureg_dst 4191dst_register(struct st_translate *t, 4192 gl_register_file file, 4193 GLuint index) 4194{ 4195 switch(file) { 4196 case PROGRAM_UNDEFINED: 4197 return ureg_dst_undef(); 4198 4199 case PROGRAM_TEMPORARY: 4200 if (ureg_dst_is_undef(t->temps[index])) 4201 t->temps[index] = ureg_DECL_temporary(t->ureg); 4202 4203 return t->temps[index]; 4204 4205 case PROGRAM_OUTPUT: 4206 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 4207 t->prevInstWrotePointSize = GL_TRUE; 4208 4209 if (t->procType == TGSI_PROCESSOR_VERTEX) 4210 assert(index < VERT_RESULT_MAX); 4211 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4212 assert(index < FRAG_RESULT_MAX); 4213 else 4214 assert(index < GEOM_RESULT_MAX); 4215 4216 assert(t->outputMapping[index] < Elements(t->outputs)); 4217 4218 return t->outputs[t->outputMapping[index]]; 4219 4220 case PROGRAM_ADDRESS: 4221 return t->address[index]; 4222 4223 default: 4224 assert(!"unknown dst register file"); 4225 return ureg_dst_undef(); 4226 } 4227} 4228 4229/** 4230 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4231 */ 4232static struct ureg_src 4233src_register(struct st_translate *t, 4234 gl_register_file file, 4235 GLuint index) 4236{ 4237 switch(file) { 4238 case PROGRAM_UNDEFINED: 4239 return ureg_src_undef(); 4240 4241 case PROGRAM_TEMPORARY: 4242 assert(index >= 0); 4243 assert(index < Elements(t->temps)); 4244 if (ureg_dst_is_undef(t->temps[index])) 4245 t->temps[index] = ureg_DECL_temporary(t->ureg); 4246 return ureg_src(t->temps[index]); 4247 4248 case PROGRAM_NAMED_PARAM: 4249 case PROGRAM_ENV_PARAM: 4250 case PROGRAM_LOCAL_PARAM: 4251 case PROGRAM_UNIFORM: 4252 assert(index >= 0); 4253 return t->constants[index]; 4254 case PROGRAM_STATE_VAR: 4255 case PROGRAM_CONSTANT: /* ie, immediate */ 4256 if (index < 0) 4257 return ureg_DECL_constant(t->ureg, 0); 4258 else 4259 return t->constants[index]; 4260 4261 case PROGRAM_IMMEDIATE: 4262 return t->immediates[index]; 4263 4264 case PROGRAM_INPUT: 4265 assert(t->inputMapping[index] < Elements(t->inputs)); 4266 return t->inputs[t->inputMapping[index]]; 4267 4268 case PROGRAM_OUTPUT: 4269 assert(t->outputMapping[index] < Elements(t->outputs)); 4270 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4271 4272 case PROGRAM_ADDRESS: 4273 return ureg_src(t->address[index]); 4274 4275 case PROGRAM_SYSTEM_VALUE: 4276 assert(index < Elements(t->systemValues)); 4277 return t->systemValues[index]; 4278 4279 default: 4280 assert(!"unknown src register file"); 4281 return ureg_src_undef(); 4282 } 4283} 4284 4285/** 4286 * Create a TGSI ureg_dst register from an st_dst_reg. 4287 */ 4288static struct ureg_dst 4289translate_dst(struct st_translate *t, 4290 const st_dst_reg *dst_reg, 4291 bool saturate) 4292{ 4293 struct ureg_dst dst = dst_register(t, 4294 dst_reg->file, 4295 dst_reg->index); 4296 4297 dst = ureg_writemask(dst, dst_reg->writemask); 4298 4299 if (saturate) 4300 dst = ureg_saturate(dst); 4301 4302 if (dst_reg->reladdr != NULL) 4303 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4304 4305 return dst; 4306} 4307 4308/** 4309 * Create a TGSI ureg_src register from an st_src_reg. 4310 */ 4311static struct ureg_src 4312translate_src(struct st_translate *t, const st_src_reg *src_reg) 4313{ 4314 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4315 4316 src = ureg_swizzle(src, 4317 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4318 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4319 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4320 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4321 4322 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4323 src = ureg_negate(src); 4324 4325 if (src_reg->reladdr != NULL) { 4326 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4327 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4328 * set the bit for src.Negate. So we have to do the operation manually 4329 * here to work around the compiler's problems. */ 4330 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4331 struct ureg_src addr = ureg_src(t->address[0]); 4332 src.Indirect = 1; 4333 src.IndirectFile = addr.File; 4334 src.IndirectIndex = addr.Index; 4335 src.IndirectSwizzle = addr.SwizzleX; 4336 4337 if (src_reg->file != PROGRAM_INPUT && 4338 src_reg->file != PROGRAM_OUTPUT) { 4339 /* If src_reg->index was negative, it was set to zero in 4340 * src_register(). Reassign it now. But don't do this 4341 * for input/output regs since they get remapped while 4342 * const buffers don't. 4343 */ 4344 src.Index = src_reg->index; 4345 } 4346 } 4347 4348 return src; 4349} 4350 4351static struct tgsi_texture_offset 4352translate_tex_offset(struct st_translate *t, 4353 const struct tgsi_texture_offset *in_offset) 4354{ 4355 struct tgsi_texture_offset offset; 4356 4357 assert(in_offset->File == PROGRAM_IMMEDIATE); 4358 4359 offset.File = TGSI_FILE_IMMEDIATE; 4360 offset.Index = in_offset->Index; 4361 offset.SwizzleX = in_offset->SwizzleX; 4362 offset.SwizzleY = in_offset->SwizzleY; 4363 offset.SwizzleZ = in_offset->SwizzleZ; 4364 4365 return offset; 4366} 4367 4368static void 4369compile_tgsi_instruction(struct st_translate *t, 4370 const glsl_to_tgsi_instruction *inst) 4371{ 4372 struct ureg_program *ureg = t->ureg; 4373 GLuint i; 4374 struct ureg_dst dst[1]; 4375 struct ureg_src src[4]; 4376 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4377 4378 unsigned num_dst; 4379 unsigned num_src; 4380 4381 num_dst = num_inst_dst_regs(inst->op); 4382 num_src = num_inst_src_regs(inst->op); 4383 4384 if (num_dst) 4385 dst[0] = translate_dst(t, 4386 &inst->dst, 4387 inst->saturate); 4388 4389 for (i = 0; i < num_src; i++) 4390 src[i] = translate_src(t, &inst->src[i]); 4391 4392 switch(inst->op) { 4393 case TGSI_OPCODE_BGNLOOP: 4394 case TGSI_OPCODE_CAL: 4395 case TGSI_OPCODE_ELSE: 4396 case TGSI_OPCODE_ENDLOOP: 4397 case TGSI_OPCODE_IF: 4398 assert(num_dst == 0); 4399 ureg_label_insn(ureg, 4400 inst->op, 4401 src, num_src, 4402 get_label(t, 4403 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4404 return; 4405 4406 case TGSI_OPCODE_TEX: 4407 case TGSI_OPCODE_TXB: 4408 case TGSI_OPCODE_TXD: 4409 case TGSI_OPCODE_TXL: 4410 case TGSI_OPCODE_TXP: 4411 case TGSI_OPCODE_TXQ: 4412 case TGSI_OPCODE_TXF: 4413 src[num_src++] = t->samplers[inst->sampler]; 4414 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4415 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4416 } 4417 ureg_tex_insn(ureg, 4418 inst->op, 4419 dst, num_dst, 4420 translate_texture_target(inst->tex_target, inst->tex_shadow), 4421 texoffsets, inst->tex_offset_num_offset, 4422 src, num_src); 4423 return; 4424 4425 case TGSI_OPCODE_SCS: 4426 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4427 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4428 break; 4429 4430 default: 4431 ureg_insn(ureg, 4432 inst->op, 4433 dst, num_dst, 4434 src, num_src); 4435 break; 4436 } 4437} 4438 4439/** 4440 * Emit the TGSI instructions to adjust the WPOS pixel center convention 4441 * Basically, add (adjX, adjY) to the fragment position. 4442 */ 4443static void 4444emit_adjusted_wpos(struct st_translate *t, 4445 const struct gl_program *program, 4446 float adjX, float adjY) 4447{ 4448 struct ureg_program *ureg = t->ureg; 4449 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 4450 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4451 4452 /* Note that we bias X and Y and pass Z and W through unchanged. 4453 * The shader might also use gl_FragCoord.w and .z. 4454 */ 4455 ureg_ADD(ureg, wpos_temp, wpos_input, 4456 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); 4457 4458 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4459} 4460 4461 4462/** 4463 * Emit the TGSI instructions for inverting the WPOS y coordinate. 4464 * This code is unavoidable because it also depends on whether 4465 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4466 */ 4467static void 4468emit_wpos_inversion(struct st_translate *t, 4469 const struct gl_program *program, 4470 bool invert) 4471{ 4472 struct ureg_program *ureg = t->ureg; 4473 4474 /* Fragment program uses fragment position input. 4475 * Need to replace instances of INPUT[WPOS] with temp T 4476 * where T = INPUT[WPOS] by y is inverted. 4477 */ 4478 static const gl_state_index wposTransformState[STATE_LENGTH] 4479 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4480 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4481 4482 /* XXX: note we are modifying the incoming shader here! Need to 4483 * do this before emitting the constant decls below, or this 4484 * will be missed: 4485 */ 4486 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4487 wposTransformState); 4488 4489 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); 4490 struct ureg_dst wpos_temp; 4491 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4492 4493 /* MOV wpos_temp, input[wpos] 4494 */ 4495 if (wpos_input.File == TGSI_FILE_TEMPORARY) 4496 wpos_temp = ureg_dst(wpos_input); 4497 else { 4498 wpos_temp = ureg_DECL_temporary(ureg); 4499 ureg_MOV(ureg, wpos_temp, wpos_input); 4500 } 4501 4502 if (invert) { 4503 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4504 */ 4505 ureg_MAD(ureg, 4506 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4507 wpos_input, 4508 ureg_scalar(wpostrans, 0), 4509 ureg_scalar(wpostrans, 1)); 4510 } else { 4511 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4512 */ 4513 ureg_MAD(ureg, 4514 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4515 wpos_input, 4516 ureg_scalar(wpostrans, 2), 4517 ureg_scalar(wpostrans, 3)); 4518 } 4519 4520 /* Use wpos_temp as position input from here on: 4521 */ 4522 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4523} 4524 4525 4526/** 4527 * Emit fragment position/ooordinate code. 4528 */ 4529static void 4530emit_wpos(struct st_context *st, 4531 struct st_translate *t, 4532 const struct gl_program *program, 4533 struct ureg_program *ureg) 4534{ 4535 const struct gl_fragment_program *fp = 4536 (const struct gl_fragment_program *) program; 4537 struct pipe_screen *pscreen = st->pipe->screen; 4538 boolean invert = FALSE; 4539 4540 if (fp->OriginUpperLeft) { 4541 /* Fragment shader wants origin in upper-left */ 4542 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4543 /* the driver supports upper-left origin */ 4544 } 4545 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4546 /* the driver supports lower-left origin, need to invert Y */ 4547 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4548 invert = TRUE; 4549 } 4550 else 4551 assert(0); 4552 } 4553 else { 4554 /* Fragment shader wants origin in lower-left */ 4555 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4556 /* the driver supports lower-left origin */ 4557 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4558 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4559 /* the driver supports upper-left origin, need to invert Y */ 4560 invert = TRUE; 4561 else 4562 assert(0); 4563 } 4564 4565 if (fp->PixelCenterInteger) { 4566 /* Fragment shader wants pixel center integer */ 4567 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 4568 /* the driver supports pixel center integer */ 4569 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4570 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 4571 /* the driver supports pixel center half integer, need to bias X,Y */ 4572 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); 4573 else 4574 assert(0); 4575 } 4576 else { 4577 /* Fragment shader wants pixel center half integer */ 4578 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4579 /* the driver supports pixel center half integer */ 4580 } 4581 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4582 /* the driver supports pixel center integer, need to bias X,Y */ 4583 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4584 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); 4585 } 4586 else 4587 assert(0); 4588 } 4589 4590 /* we invert after adjustment so that we avoid the MOV to temporary, 4591 * and reuse the adjustment ADD instead */ 4592 emit_wpos_inversion(t, program, invert); 4593} 4594 4595/** 4596 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4597 * TGSI uses +1 for front, -1 for back. 4598 * This function converts the TGSI value to the GL value. Simply clamping/ 4599 * saturating the value to [0,1] does the job. 4600 */ 4601static void 4602emit_face_var(struct st_translate *t) 4603{ 4604 struct ureg_program *ureg = t->ureg; 4605 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4606 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4607 4608 /* MOV_SAT face_temp, input[face] */ 4609 face_temp = ureg_saturate(face_temp); 4610 ureg_MOV(ureg, face_temp, face_input); 4611 4612 /* Use face_temp as face input from here on: */ 4613 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4614} 4615 4616static void 4617emit_edgeflags(struct st_translate *t) 4618{ 4619 struct ureg_program *ureg = t->ureg; 4620 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4621 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4622 4623 ureg_MOV(ureg, edge_dst, edge_src); 4624} 4625 4626/** 4627 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4628 * \param program the program to translate 4629 * \param numInputs number of input registers used 4630 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4631 * input indexes 4632 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4633 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4634 * each input 4635 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4636 * \param numOutputs number of output registers used 4637 * \param outputMapping maps Mesa fragment program outputs to TGSI 4638 * generic outputs 4639 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4640 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4641 * each output 4642 * 4643 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4644 */ 4645extern "C" enum pipe_error 4646st_translate_program( 4647 struct gl_context *ctx, 4648 uint procType, 4649 struct ureg_program *ureg, 4650 glsl_to_tgsi_visitor *program, 4651 const struct gl_program *proginfo, 4652 GLuint numInputs, 4653 const GLuint inputMapping[], 4654 const ubyte inputSemanticName[], 4655 const ubyte inputSemanticIndex[], 4656 const GLuint interpMode[], 4657 GLuint numOutputs, 4658 const GLuint outputMapping[], 4659 const ubyte outputSemanticName[], 4660 const ubyte outputSemanticIndex[], 4661 boolean passthrough_edgeflags) 4662{ 4663 struct st_translate translate, *t; 4664 unsigned i; 4665 enum pipe_error ret = PIPE_OK; 4666 4667 assert(numInputs <= Elements(t->inputs)); 4668 assert(numOutputs <= Elements(t->outputs)); 4669 4670 t = &translate; 4671 memset(t, 0, sizeof *t); 4672 4673 t->procType = procType; 4674 t->inputMapping = inputMapping; 4675 t->outputMapping = outputMapping; 4676 t->ureg = ureg; 4677 t->pointSizeOutIndex = -1; 4678 t->prevInstWrotePointSize = GL_FALSE; 4679 4680 /* 4681 * Declare input attributes. 4682 */ 4683 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4684 for (i = 0; i < numInputs; i++) { 4685 t->inputs[i] = ureg_DECL_fs_input(ureg, 4686 inputSemanticName[i], 4687 inputSemanticIndex[i], 4688 interpMode[i]); 4689 } 4690 4691 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4692 /* Must do this after setting up t->inputs, and before 4693 * emitting constant references, below: 4694 */ 4695 emit_wpos(st_context(ctx), t, proginfo, ureg); 4696 } 4697 4698 if (proginfo->InputsRead & FRAG_BIT_FACE) 4699 emit_face_var(t); 4700 4701 /* 4702 * Declare output attributes. 4703 */ 4704 for (i = 0; i < numOutputs; i++) { 4705 switch (outputSemanticName[i]) { 4706 case TGSI_SEMANTIC_POSITION: 4707 t->outputs[i] = ureg_DECL_output(ureg, 4708 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4709 outputSemanticIndex[i]); 4710 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4711 break; 4712 case TGSI_SEMANTIC_STENCIL: 4713 t->outputs[i] = ureg_DECL_output(ureg, 4714 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4715 outputSemanticIndex[i]); 4716 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4717 break; 4718 case TGSI_SEMANTIC_COLOR: 4719 t->outputs[i] = ureg_DECL_output(ureg, 4720 TGSI_SEMANTIC_COLOR, 4721 outputSemanticIndex[i]); 4722 break; 4723 default: 4724 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4725 return PIPE_ERROR_BAD_INPUT; 4726 } 4727 } 4728 } 4729 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4730 for (i = 0; i < numInputs; i++) { 4731 t->inputs[i] = ureg_DECL_gs_input(ureg, 4732 i, 4733 inputSemanticName[i], 4734 inputSemanticIndex[i]); 4735 } 4736 4737 for (i = 0; i < numOutputs; i++) { 4738 t->outputs[i] = ureg_DECL_output(ureg, 4739 outputSemanticName[i], 4740 outputSemanticIndex[i]); 4741 } 4742 } 4743 else { 4744 assert(procType == TGSI_PROCESSOR_VERTEX); 4745 4746 for (i = 0; i < numInputs; i++) { 4747 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4748 } 4749 4750 for (i = 0; i < numOutputs; i++) { 4751 t->outputs[i] = ureg_DECL_output(ureg, 4752 outputSemanticName[i], 4753 outputSemanticIndex[i]); 4754 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4755 /* Writing to the point size result register requires special 4756 * handling to implement clamping. 4757 */ 4758 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4759 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4760 /* XXX: note we are modifying the incoming shader here! Need to 4761 * do this before emitting the constant decls below, or this 4762 * will be missed. 4763 */ 4764 unsigned pointSizeClampConst = 4765 _mesa_add_state_reference(proginfo->Parameters, 4766 pointSizeClampState); 4767 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); 4768 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); 4769 t->pointSizeResult = t->outputs[i]; 4770 t->pointSizeOutIndex = i; 4771 t->outputs[i] = psizregtemp; 4772 } 4773 } 4774 if (passthrough_edgeflags) 4775 emit_edgeflags(t); 4776 } 4777 4778 /* Declare address register. 4779 */ 4780 if (program->num_address_regs > 0) { 4781 assert(program->num_address_regs == 1); 4782 t->address[0] = ureg_DECL_address(ureg); 4783 } 4784 4785 /* Declare misc input registers 4786 */ 4787 { 4788 GLbitfield sysInputs = proginfo->SystemValuesRead; 4789 unsigned numSys = 0; 4790 for (i = 0; sysInputs; i++) { 4791 if (sysInputs & (1 << i)) { 4792 unsigned semName = mesa_sysval_to_semantic[i]; 4793 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4794 numSys++; 4795 sysInputs &= ~(1 << i); 4796 } 4797 } 4798 } 4799 4800 if (program->indirect_addr_temps) { 4801 /* If temps are accessed with indirect addressing, declare temporaries 4802 * in sequential order. Else, we declare them on demand elsewhere. 4803 * (Note: the number of temporaries is equal to program->next_temp) 4804 */ 4805 for (i = 0; i < (unsigned)program->next_temp; i++) { 4806 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4807 t->temps[i] = ureg_DECL_temporary(t->ureg); 4808 } 4809 } 4810 4811 /* Emit constants and uniforms. TGSI uses a single index space for these, 4812 * so we put all the translated regs in t->constants. 4813 */ 4814 if (proginfo->Parameters) { 4815 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4816 if (t->constants == NULL) { 4817 ret = PIPE_ERROR_OUT_OF_MEMORY; 4818 goto out; 4819 } 4820 4821 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4822 switch (proginfo->Parameters->Parameters[i].Type) { 4823 case PROGRAM_ENV_PARAM: 4824 case PROGRAM_LOCAL_PARAM: 4825 case PROGRAM_STATE_VAR: 4826 case PROGRAM_NAMED_PARAM: 4827 case PROGRAM_UNIFORM: 4828 t->constants[i] = ureg_DECL_constant(ureg, i); 4829 break; 4830 4831 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4832 * addressing of the const buffer. 4833 * FIXME: Be smarter and recognize param arrays: 4834 * indirect addressing is only valid within the referenced 4835 * array. 4836 */ 4837 case PROGRAM_CONSTANT: 4838 if (program->indirect_addr_consts) 4839 t->constants[i] = ureg_DECL_constant(ureg, i); 4840 else 4841 t->constants[i] = emit_immediate(t, 4842 proginfo->Parameters->ParameterValues[i], 4843 proginfo->Parameters->Parameters[i].DataType, 4844 4); 4845 break; 4846 default: 4847 break; 4848 } 4849 } 4850 } 4851 4852 /* Emit immediate values. 4853 */ 4854 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4855 if (t->immediates == NULL) { 4856 ret = PIPE_ERROR_OUT_OF_MEMORY; 4857 goto out; 4858 } 4859 i = 0; 4860 foreach_iter(exec_list_iterator, iter, program->immediates) { 4861 immediate_storage *imm = (immediate_storage *)iter.get(); 4862 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4863 } 4864 4865 /* texture samplers */ 4866 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4867 if (program->samplers_used & (1 << i)) { 4868 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4869 } 4870 } 4871 4872 /* Emit each instruction in turn: 4873 */ 4874 foreach_iter(exec_list_iterator, iter, program->instructions) { 4875 set_insn_start(t, ureg_get_instruction_number(ureg)); 4876 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); 4877 4878 if (t->prevInstWrotePointSize && proginfo->Id) { 4879 /* The previous instruction wrote to the (fake) vertex point size 4880 * result register. Now we need to clamp that value to the min/max 4881 * point size range, putting the result into the real point size 4882 * register. 4883 * Note that we can't do this easily at the end of program due to 4884 * possible early return. 4885 */ 4886 set_insn_start(t, ureg_get_instruction_number(ureg)); 4887 ureg_MAX(t->ureg, 4888 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4889 ureg_src(t->outputs[t->pointSizeOutIndex]), 4890 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4891 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4892 ureg_src(t->outputs[t->pointSizeOutIndex]), 4893 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4894 } 4895 t->prevInstWrotePointSize = GL_FALSE; 4896 } 4897 4898 /* Fix up all emitted labels: 4899 */ 4900 for (i = 0; i < t->labels_count; i++) { 4901 ureg_fixup_label(ureg, t->labels[i].token, 4902 t->insn[t->labels[i].branch_target]); 4903 } 4904 4905out: 4906 FREE(t->insn); 4907 FREE(t->labels); 4908 FREE(t->constants); 4909 FREE(t->immediates); 4910 4911 if (t->error) { 4912 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4913 } 4914 4915 return ret; 4916} 4917/* ----------------------------- End TGSI code ------------------------------ */ 4918 4919/** 4920 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4921 * generating Mesa IR. 4922 */ 4923static struct gl_program * 4924get_mesa_program(struct gl_context *ctx, 4925 struct gl_shader_program *shader_program, 4926 struct gl_shader *shader) 4927{ 4928 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4929 struct gl_program *prog; 4930 GLenum target; 4931 const char *target_string; 4932 bool progress; 4933 struct gl_shader_compiler_options *options = 4934 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4935 4936 switch (shader->Type) { 4937 case GL_VERTEX_SHADER: 4938 target = GL_VERTEX_PROGRAM_ARB; 4939 target_string = "vertex"; 4940 break; 4941 case GL_FRAGMENT_SHADER: 4942 target = GL_FRAGMENT_PROGRAM_ARB; 4943 target_string = "fragment"; 4944 break; 4945 case GL_GEOMETRY_SHADER: 4946 target = GL_GEOMETRY_PROGRAM_NV; 4947 target_string = "geometry"; 4948 break; 4949 default: 4950 assert(!"should not be reached"); 4951 return NULL; 4952 } 4953 4954 validate_ir_tree(shader->ir); 4955 4956 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4957 if (!prog) 4958 return NULL; 4959 prog->Parameters = _mesa_new_parameter_list(); 4960 v->ctx = ctx; 4961 v->prog = prog; 4962 v->shader_program = shader_program; 4963 v->options = options; 4964 v->glsl_version = ctx->Const.GLSLVersion; 4965 v->native_integers = ctx->Const.NativeIntegers; 4966 4967 add_uniforms_to_parameters_list(shader_program, shader, prog); 4968 4969 /* Emit intermediate IR for main(). */ 4970 visit_exec_list(shader->ir, v); 4971 4972 /* Now emit bodies for any functions that were used. */ 4973 do { 4974 progress = GL_FALSE; 4975 4976 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4977 function_entry *entry = (function_entry *)iter.get(); 4978 4979 if (!entry->bgn_inst) { 4980 v->current_function = entry; 4981 4982 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4983 entry->bgn_inst->function = entry; 4984 4985 visit_exec_list(&entry->sig->body, v); 4986 4987 glsl_to_tgsi_instruction *last; 4988 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4989 if (last->op != TGSI_OPCODE_RET) 4990 v->emit(NULL, TGSI_OPCODE_RET); 4991 4992 glsl_to_tgsi_instruction *end; 4993 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4994 end->function = entry; 4995 4996 progress = GL_TRUE; 4997 } 4998 } 4999 } while (progress); 5000 5001#if 0 5002 /* Print out some information (for debugging purposes) used by the 5003 * optimization passes. */ 5004 for (i=0; i < v->next_temp; i++) { 5005 int fr = v->get_first_temp_read(i); 5006 int fw = v->get_first_temp_write(i); 5007 int lr = v->get_last_temp_read(i); 5008 int lw = v->get_last_temp_write(i); 5009 5010 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 5011 assert(fw <= fr); 5012 } 5013#endif 5014 5015 /* Remove reads to output registers, and to varyings in vertex shaders. */ 5016 v->remove_output_reads(PROGRAM_OUTPUT); 5017 if (target == GL_VERTEX_PROGRAM_ARB) 5018 v->remove_output_reads(PROGRAM_VARYING); 5019 5020 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 5021 v->simplify_cmp(); 5022 v->copy_propagate(); 5023 while (v->eliminate_dead_code_advanced()); 5024 5025 /* FIXME: These passes to optimize temporary registers don't work when there 5026 * is indirect addressing of the temporary register space. We need proper 5027 * array support so that we don't have to give up these passes in every 5028 * shader that uses arrays. 5029 */ 5030 if (!v->indirect_addr_temps) { 5031 v->eliminate_dead_code(); 5032 v->merge_registers(); 5033 v->renumber_registers(); 5034 } 5035 5036 /* Write the END instruction. */ 5037 v->emit(NULL, TGSI_OPCODE_END); 5038 5039 if (ctx->Shader.Flags & GLSL_DUMP) { 5040 printf("\n"); 5041 printf("GLSL IR for linked %s program %d:\n", target_string, 5042 shader_program->Name); 5043 _mesa_print_ir(shader->ir, NULL); 5044 printf("\n"); 5045 printf("\n"); 5046 } 5047 5048 prog->Instructions = NULL; 5049 prog->NumInstructions = 0; 5050 5051 do_set_program_inouts(shader->ir, prog); 5052 count_resources(v, prog); 5053 5054 check_resources(ctx, shader_program, v, prog); 5055 5056 _mesa_reference_program(ctx, &shader->Program, prog); 5057 5058 struct st_vertex_program *stvp; 5059 struct st_fragment_program *stfp; 5060 struct st_geometry_program *stgp; 5061 5062 switch (shader->Type) { 5063 case GL_VERTEX_SHADER: 5064 stvp = (struct st_vertex_program *)prog; 5065 stvp->glsl_to_tgsi = v; 5066 break; 5067 case GL_FRAGMENT_SHADER: 5068 stfp = (struct st_fragment_program *)prog; 5069 stfp->glsl_to_tgsi = v; 5070 break; 5071 case GL_GEOMETRY_SHADER: 5072 stgp = (struct st_geometry_program *)prog; 5073 stgp->glsl_to_tgsi = v; 5074 break; 5075 default: 5076 assert(!"should not be reached"); 5077 return NULL; 5078 } 5079 5080 return prog; 5081} 5082 5083extern "C" { 5084 5085struct gl_shader * 5086st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 5087{ 5088 struct gl_shader *shader; 5089 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 5090 type == GL_GEOMETRY_SHADER_ARB); 5091 shader = rzalloc(NULL, struct gl_shader); 5092 if (shader) { 5093 shader->Type = type; 5094 shader->Name = name; 5095 _mesa_init_shader(ctx, shader); 5096 } 5097 return shader; 5098} 5099 5100struct gl_shader_program * 5101st_new_shader_program(struct gl_context *ctx, GLuint name) 5102{ 5103 struct gl_shader_program *shProg; 5104 shProg = rzalloc(NULL, struct gl_shader_program); 5105 if (shProg) { 5106 shProg->Name = name; 5107 _mesa_init_shader_program(ctx, shProg); 5108 } 5109 return shProg; 5110} 5111 5112/** 5113 * Link a shader. 5114 * Called via ctx->Driver.LinkShader() 5115 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 5116 * with code lowering and other optimizations. 5117 */ 5118GLboolean 5119st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 5120{ 5121 assert(prog->LinkStatus); 5122 5123 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5124 if (prog->_LinkedShaders[i] == NULL) 5125 continue; 5126 5127 bool progress; 5128 exec_list *ir = prog->_LinkedShaders[i]->ir; 5129 const struct gl_shader_compiler_options *options = 5130 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 5131 5132 do { 5133 progress = false; 5134 5135 /* Lowering */ 5136 do_mat_op_to_vec(ir); 5137 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 5138 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 5139 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 5140 5141 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5142 5143 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; 5144 5145 progress = lower_quadop_vector(ir, false) || progress; 5146 5147 if (options->MaxIfDepth == 0) 5148 progress = lower_discard(ir) || progress; 5149 5150 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5151 5152 if (options->EmitNoNoise) 5153 progress = lower_noise(ir) || progress; 5154 5155 /* If there are forms of indirect addressing that the driver 5156 * cannot handle, perform the lowering pass. 5157 */ 5158 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5159 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5160 progress = 5161 lower_variable_index_to_cond_assign(ir, 5162 options->EmitNoIndirectInput, 5163 options->EmitNoIndirectOutput, 5164 options->EmitNoIndirectTemp, 5165 options->EmitNoIndirectUniform) 5166 || progress; 5167 5168 progress = do_vec_index_to_cond_assign(ir) || progress; 5169 } while (progress); 5170 5171 validate_ir_tree(ir); 5172 } 5173 5174 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5175 struct gl_program *linked_prog; 5176 5177 if (prog->_LinkedShaders[i] == NULL) 5178 continue; 5179 5180 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 5181 5182 if (linked_prog) { 5183 bool ok = true; 5184 5185 switch (prog->_LinkedShaders[i]->Type) { 5186 case GL_VERTEX_SHADER: 5187 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5188 linked_prog); 5189 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, 5190 linked_prog); 5191 if (!ok) { 5192 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); 5193 } 5194 break; 5195 case GL_FRAGMENT_SHADER: 5196 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, 5197 (struct gl_fragment_program *)linked_prog); 5198 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, 5199 linked_prog); 5200 if (!ok) { 5201 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); 5202 } 5203 break; 5204 case GL_GEOMETRY_SHADER: 5205 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5206 linked_prog); 5207 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, 5208 linked_prog); 5209 if (!ok) { 5210 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); 5211 } 5212 break; 5213 } 5214 if (!ok) { 5215 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL); 5216 _mesa_reference_program(ctx, &linked_prog, NULL); 5217 return GL_FALSE; 5218 } 5219 } 5220 5221 _mesa_reference_program(ctx, &linked_prog, NULL); 5222 } 5223 5224 return GL_TRUE; 5225} 5226 5227} /* extern "C" */ 5228