st_glsl_to_tgsi.cpp revision fc7ac4da7dfb64aa192ef8cff44cb762beace4c1
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45#include "main/mtypes.h" 46#include "main/shaderobj.h" 47#include "program/hash_table.h" 48 49extern "C" { 50#include "main/shaderapi.h" 51#include "main/uniforms.h" 52#include "program/prog_instruction.h" 53#include "program/prog_optimize.h" 54#include "program/prog_print.h" 55#include "program/program.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81/** 82 * Maximum number of temporary registers. 83 * 84 * It is too big for stack allocated arrays -- it will cause stack overflow on 85 * Windows and likely Mac OS X. 86 */ 87#define MAX_TEMPS 4096 88 89/* will be 4 for GLSL 4.00 */ 90#define MAX_GLSL_TEXTURE_OFFSET 1 91 92class st_src_reg; 93class st_dst_reg; 94 95static int swizzle_for_size(int size); 96 97/** 98 * This struct is a corresponding struct to TGSI ureg_src. 99 */ 100class st_src_reg { 101public: 102 st_src_reg(gl_register_file file, int index, const glsl_type *type) 103 { 104 this->file = file; 105 this->index = index; 106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 107 this->swizzle = swizzle_for_size(type->vector_elements); 108 else 109 this->swizzle = SWIZZLE_XYZW; 110 this->negate = 0; 111 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 112 this->reladdr = NULL; 113 } 114 115 st_src_reg(gl_register_file file, int index, int type) 116 { 117 this->type = type; 118 this->file = file; 119 this->index = index; 120 this->swizzle = SWIZZLE_XYZW; 121 this->negate = 0; 122 this->reladdr = NULL; 123 } 124 125 st_src_reg() 126 { 127 this->type = GLSL_TYPE_ERROR; 128 this->file = PROGRAM_UNDEFINED; 129 this->index = 0; 130 this->swizzle = 0; 131 this->negate = 0; 132 this->reladdr = NULL; 133 } 134 135 explicit st_src_reg(st_dst_reg reg); 136 137 gl_register_file file; /**< PROGRAM_* from Mesa */ 138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 140 int negate; /**< NEGATE_XYZW mask from mesa */ 141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 142 /** Register index should be offset by the integer in this reg. */ 143 st_src_reg *reladdr; 144}; 145 146class st_dst_reg { 147public: 148 st_dst_reg(gl_register_file file, int writemask, int type) 149 { 150 this->file = file; 151 this->index = 0; 152 this->writemask = writemask; 153 this->cond_mask = COND_TR; 154 this->reladdr = NULL; 155 this->type = type; 156 } 157 158 st_dst_reg() 159 { 160 this->type = GLSL_TYPE_ERROR; 161 this->file = PROGRAM_UNDEFINED; 162 this->index = 0; 163 this->writemask = 0; 164 this->cond_mask = COND_TR; 165 this->reladdr = NULL; 166 } 167 168 explicit st_dst_reg(st_src_reg reg); 169 170 gl_register_file file; /**< PROGRAM_* from Mesa */ 171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173 GLuint cond_mask:4; 174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175 /** Register index should be offset by the integer in this reg. */ 176 st_src_reg *reladdr; 177}; 178 179st_src_reg::st_src_reg(st_dst_reg reg) 180{ 181 this->type = reg.type; 182 this->file = reg.file; 183 this->index = reg.index; 184 this->swizzle = SWIZZLE_XYZW; 185 this->negate = 0; 186 this->reladdr = reg.reladdr; 187} 188 189st_dst_reg::st_dst_reg(st_src_reg reg) 190{ 191 this->type = reg.type; 192 this->file = reg.file; 193 this->index = reg.index; 194 this->writemask = WRITEMASK_XYZW; 195 this->cond_mask = COND_TR; 196 this->reladdr = reg.reladdr; 197} 198 199class glsl_to_tgsi_instruction : public exec_node { 200public: 201 /* Callers of this ralloc-based new need not call delete. It's 202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203 static void* operator new(size_t size, void *ctx) 204 { 205 void *node; 206 207 node = rzalloc_size(ctx, size); 208 assert(node != NULL); 209 210 return node; 211 } 212 213 unsigned op; 214 st_dst_reg dst; 215 st_src_reg src[3]; 216 /** Pointer to the ir source this tree came from for debugging */ 217 ir_instruction *ir; 218 GLboolean cond_update; 219 bool saturate; 220 int sampler; /**< sampler index */ 221 int tex_target; /**< One of TEXTURE_*_INDEX */ 222 GLboolean tex_shadow; 223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 224 unsigned tex_offset_num_offset; 225 int dead_mask; /**< Used in dead code elimination */ 226 227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228}; 229 230class variable_storage : public exec_node { 231public: 232 variable_storage(ir_variable *var, gl_register_file file, int index) 233 : file(file), index(index), var(var) 234 { 235 /* empty */ 236 } 237 238 gl_register_file file; 239 int index; 240 ir_variable *var; /* variable that maps to this, if any */ 241}; 242 243class immediate_storage : public exec_node { 244public: 245 immediate_storage(gl_constant_value *values, int size, int type) 246 { 247 memcpy(this->values, values, size * sizeof(gl_constant_value)); 248 this->size = size; 249 this->type = type; 250 } 251 252 gl_constant_value values[4]; 253 int size; /**< Number of components (1-4) */ 254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255}; 256 257class function_entry : public exec_node { 258public: 259 ir_function_signature *sig; 260 261 /** 262 * identifier of this function signature used by the program. 263 * 264 * At the point that TGSI instructions for function calls are 265 * generated, we don't know the address of the first instruction of 266 * the function body. So we make the BranchTarget that is called a 267 * small integer and rewrite them during set_branchtargets(). 268 */ 269 int sig_id; 270 271 /** 272 * Pointer to first instruction of the function body. 273 * 274 * Set during function body emits after main() is processed. 275 */ 276 glsl_to_tgsi_instruction *bgn_inst; 277 278 /** 279 * Index of the first instruction of the function body in actual TGSI. 280 * 281 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 282 */ 283 int inst; 284 285 /** Storage for the return value. */ 286 st_src_reg return_reg; 287}; 288 289class glsl_to_tgsi_visitor : public ir_visitor { 290public: 291 glsl_to_tgsi_visitor(); 292 ~glsl_to_tgsi_visitor(); 293 294 function_entry *current_function; 295 296 struct gl_context *ctx; 297 struct gl_program *prog; 298 struct gl_shader_program *shader_program; 299 struct gl_shader_compiler_options *options; 300 301 int next_temp; 302 303 int num_address_regs; 304 int samplers_used; 305 bool indirect_addr_temps; 306 bool indirect_addr_consts; 307 308 int glsl_version; 309 bool native_integers; 310 311 variable_storage *find_variable_storage(ir_variable *var); 312 313 int add_constant(gl_register_file file, gl_constant_value values[4], 314 int size, int datatype, GLuint *swizzle_out); 315 316 function_entry *get_function_signature(ir_function_signature *sig); 317 318 st_src_reg get_temp(const glsl_type *type); 319 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 320 321 st_src_reg st_src_reg_for_float(float val); 322 st_src_reg st_src_reg_for_int(int val); 323 st_src_reg st_src_reg_for_type(int type, int val); 324 325 /** 326 * \name Visit methods 327 * 328 * As typical for the visitor pattern, there must be one \c visit method for 329 * each concrete subclass of \c ir_instruction. Virtual base classes within 330 * the hierarchy should not have \c visit methods. 331 */ 332 /*@{*/ 333 virtual void visit(ir_variable *); 334 virtual void visit(ir_loop *); 335 virtual void visit(ir_loop_jump *); 336 virtual void visit(ir_function_signature *); 337 virtual void visit(ir_function *); 338 virtual void visit(ir_expression *); 339 virtual void visit(ir_swizzle *); 340 virtual void visit(ir_dereference_variable *); 341 virtual void visit(ir_dereference_array *); 342 virtual void visit(ir_dereference_record *); 343 virtual void visit(ir_assignment *); 344 virtual void visit(ir_constant *); 345 virtual void visit(ir_call *); 346 virtual void visit(ir_return *); 347 virtual void visit(ir_discard *); 348 virtual void visit(ir_texture *); 349 virtual void visit(ir_if *); 350 /*@}*/ 351 352 st_src_reg result; 353 354 /** List of variable_storage */ 355 exec_list variables; 356 357 /** List of immediate_storage */ 358 exec_list immediates; 359 int num_immediates; 360 361 /** List of function_entry */ 362 exec_list function_signatures; 363 int next_signature_id; 364 365 /** List of glsl_to_tgsi_instruction */ 366 exec_list instructions; 367 368 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 369 370 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 371 st_dst_reg dst, st_src_reg src0); 372 373 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 374 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 375 376 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 377 st_dst_reg dst, 378 st_src_reg src0, st_src_reg src1, st_src_reg src2); 379 380 unsigned get_opcode(ir_instruction *ir, unsigned op, 381 st_dst_reg dst, 382 st_src_reg src0, st_src_reg src1); 383 384 /** 385 * Emit the correct dot-product instruction for the type of arguments 386 */ 387 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 388 st_dst_reg dst, 389 st_src_reg src0, 390 st_src_reg src1, 391 unsigned elements); 392 393 void emit_scalar(ir_instruction *ir, unsigned op, 394 st_dst_reg dst, st_src_reg src0); 395 396 void emit_scalar(ir_instruction *ir, unsigned op, 397 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 398 399 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 400 401 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 402 403 void emit_scs(ir_instruction *ir, unsigned op, 404 st_dst_reg dst, const st_src_reg &src); 405 406 bool try_emit_mad(ir_expression *ir, 407 int mul_operand); 408 bool try_emit_mad_for_and_not(ir_expression *ir, 409 int mul_operand); 410 bool try_emit_sat(ir_expression *ir); 411 412 void emit_swz(ir_expression *ir); 413 414 bool process_move_condition(ir_rvalue *ir); 415 416 void remove_output_reads(gl_register_file type); 417 void simplify_cmp(void); 418 419 void rename_temp_register(int index, int new_index); 420 int get_first_temp_read(int index); 421 int get_first_temp_write(int index); 422 int get_last_temp_read(int index); 423 int get_last_temp_write(int index); 424 425 void copy_propagate(void); 426 void eliminate_dead_code(void); 427 int eliminate_dead_code_advanced(void); 428 void merge_registers(void); 429 void renumber_registers(void); 430 431 void *mem_ctx; 432}; 433 434static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 435 436static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 437 438static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 439 440static void 441fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 442 443static void 444fail_link(struct gl_shader_program *prog, const char *fmt, ...) 445{ 446 va_list args; 447 va_start(args, fmt); 448 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 449 va_end(args); 450 451 prog->LinkStatus = GL_FALSE; 452} 453 454static int 455swizzle_for_size(int size) 456{ 457 int size_swizzles[4] = { 458 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 461 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 462 }; 463 464 assert((size >= 1) && (size <= 4)); 465 return size_swizzles[size - 1]; 466} 467 468static bool 469is_tex_instruction(unsigned opcode) 470{ 471 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 472 return info->is_tex; 473} 474 475static unsigned 476num_inst_dst_regs(unsigned opcode) 477{ 478 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 479 return info->num_dst; 480} 481 482static unsigned 483num_inst_src_regs(unsigned opcode) 484{ 485 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 486 return info->is_tex ? info->num_src - 1 : info->num_src; 487} 488 489glsl_to_tgsi_instruction * 490glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 491 st_dst_reg dst, 492 st_src_reg src0, st_src_reg src1, st_src_reg src2) 493{ 494 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 495 int num_reladdr = 0, i; 496 497 op = get_opcode(ir, op, dst, src0, src1); 498 499 /* If we have to do relative addressing, we want to load the ARL 500 * reg directly for one of the regs, and preload the other reladdr 501 * sources into temps. 502 */ 503 num_reladdr += dst.reladdr != NULL; 504 num_reladdr += src0.reladdr != NULL; 505 num_reladdr += src1.reladdr != NULL; 506 num_reladdr += src2.reladdr != NULL; 507 508 reladdr_to_temp(ir, &src2, &num_reladdr); 509 reladdr_to_temp(ir, &src1, &num_reladdr); 510 reladdr_to_temp(ir, &src0, &num_reladdr); 511 512 if (dst.reladdr) { 513 emit_arl(ir, address_reg, *dst.reladdr); 514 num_reladdr--; 515 } 516 assert(num_reladdr == 0); 517 518 inst->op = op; 519 inst->dst = dst; 520 inst->src[0] = src0; 521 inst->src[1] = src1; 522 inst->src[2] = src2; 523 inst->ir = ir; 524 inst->dead_mask = 0; 525 526 inst->function = NULL; 527 528 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 529 this->num_address_regs = 1; 530 531 /* Update indirect addressing status used by TGSI */ 532 if (dst.reladdr) { 533 switch(dst.file) { 534 case PROGRAM_TEMPORARY: 535 this->indirect_addr_temps = true; 536 break; 537 case PROGRAM_LOCAL_PARAM: 538 case PROGRAM_ENV_PARAM: 539 case PROGRAM_STATE_VAR: 540 case PROGRAM_NAMED_PARAM: 541 case PROGRAM_CONSTANT: 542 case PROGRAM_UNIFORM: 543 this->indirect_addr_consts = true; 544 break; 545 case PROGRAM_IMMEDIATE: 546 assert(!"immediates should not have indirect addressing"); 547 break; 548 default: 549 break; 550 } 551 } 552 else { 553 for (i=0; i<3; i++) { 554 if(inst->src[i].reladdr) { 555 switch(inst->src[i].file) { 556 case PROGRAM_TEMPORARY: 557 this->indirect_addr_temps = true; 558 break; 559 case PROGRAM_LOCAL_PARAM: 560 case PROGRAM_ENV_PARAM: 561 case PROGRAM_STATE_VAR: 562 case PROGRAM_NAMED_PARAM: 563 case PROGRAM_CONSTANT: 564 case PROGRAM_UNIFORM: 565 this->indirect_addr_consts = true; 566 break; 567 case PROGRAM_IMMEDIATE: 568 assert(!"immediates should not have indirect addressing"); 569 break; 570 default: 571 break; 572 } 573 } 574 } 575 } 576 577 this->instructions.push_tail(inst); 578 579 if (native_integers) 580 try_emit_float_set(ir, op, dst); 581 582 return inst; 583} 584 585 586glsl_to_tgsi_instruction * 587glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 588 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 589{ 590 return emit(ir, op, dst, src0, src1, undef_src); 591} 592 593glsl_to_tgsi_instruction * 594glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 595 st_dst_reg dst, st_src_reg src0) 596{ 597 assert(dst.writemask != 0); 598 return emit(ir, op, dst, src0, undef_src, undef_src); 599} 600 601glsl_to_tgsi_instruction * 602glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 603{ 604 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 605} 606 607 /** 608 * Emits the code to convert the result of float SET instructions to integers. 609 */ 610void 611glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 612 st_dst_reg dst) 613{ 614 if ((op == TGSI_OPCODE_SEQ || 615 op == TGSI_OPCODE_SNE || 616 op == TGSI_OPCODE_SGE || 617 op == TGSI_OPCODE_SLT)) 618 { 619 st_src_reg src = st_src_reg(dst); 620 src.negate = ~src.negate; 621 dst.type = GLSL_TYPE_FLOAT; 622 emit(ir, TGSI_OPCODE_F2I, dst, src); 623 } 624} 625 626/** 627 * Determines whether to use an integer, unsigned integer, or float opcode 628 * based on the operands and input opcode, then emits the result. 629 */ 630unsigned 631glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 632 st_dst_reg dst, 633 st_src_reg src0, st_src_reg src1) 634{ 635 int type = GLSL_TYPE_FLOAT; 636 637 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 638 type = GLSL_TYPE_FLOAT; 639 else if (native_integers) 640 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 641 642#define case4(c, f, i, u) \ 643 case TGSI_OPCODE_##c: \ 644 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 645 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 646 else op = TGSI_OPCODE_##f; \ 647 break; 648#define case3(f, i, u) case4(f, f, i, u) 649#define case2fi(f, i) case4(f, f, i, i) 650#define case2iu(i, u) case4(i, LAST, i, u) 651 652 switch(op) { 653 case2fi(ADD, UADD); 654 case2fi(MUL, UMUL); 655 case2fi(MAD, UMAD); 656 case3(DIV, IDIV, UDIV); 657 case3(MAX, IMAX, UMAX); 658 case3(MIN, IMIN, UMIN); 659 case2iu(MOD, UMOD); 660 661 case2fi(SEQ, USEQ); 662 case2fi(SNE, USNE); 663 case3(SGE, ISGE, USGE); 664 case3(SLT, ISLT, USLT); 665 666 case2iu(ISHR, USHR); 667 668 default: break; 669 } 670 671 assert(op != TGSI_OPCODE_LAST); 672 return op; 673} 674 675glsl_to_tgsi_instruction * 676glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 677 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 678 unsigned elements) 679{ 680 static const unsigned dot_opcodes[] = { 681 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 682 }; 683 684 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 685} 686 687/** 688 * Emits TGSI scalar opcodes to produce unique answers across channels. 689 * 690 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 691 * channel determines the result across all channels. So to do a vec4 692 * of this operation, we want to emit a scalar per source channel used 693 * to produce dest channels. 694 */ 695void 696glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 697 st_dst_reg dst, 698 st_src_reg orig_src0, st_src_reg orig_src1) 699{ 700 int i, j; 701 int done_mask = ~dst.writemask; 702 703 /* TGSI RCP is a scalar operation splatting results to all channels, 704 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 705 * dst channels. 706 */ 707 for (i = 0; i < 4; i++) { 708 GLuint this_mask = (1 << i); 709 glsl_to_tgsi_instruction *inst; 710 st_src_reg src0 = orig_src0; 711 st_src_reg src1 = orig_src1; 712 713 if (done_mask & this_mask) 714 continue; 715 716 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 717 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 718 for (j = i + 1; j < 4; j++) { 719 /* If there is another enabled component in the destination that is 720 * derived from the same inputs, generate its value on this pass as 721 * well. 722 */ 723 if (!(done_mask & (1 << j)) && 724 GET_SWZ(src0.swizzle, j) == src0_swiz && 725 GET_SWZ(src1.swizzle, j) == src1_swiz) { 726 this_mask |= (1 << j); 727 } 728 } 729 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 730 src0_swiz, src0_swiz); 731 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 732 src1_swiz, src1_swiz); 733 734 inst = emit(ir, op, dst, src0, src1); 735 inst->dst.writemask = this_mask; 736 done_mask |= this_mask; 737 } 738} 739 740void 741glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 742 st_dst_reg dst, st_src_reg src0) 743{ 744 st_src_reg undef = undef_src; 745 746 undef.swizzle = SWIZZLE_XXXX; 747 748 emit_scalar(ir, op, dst, src0, undef); 749} 750 751void 752glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 753 st_dst_reg dst, st_src_reg src0) 754{ 755 int op = TGSI_OPCODE_ARL; 756 757 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 758 op = TGSI_OPCODE_UARL; 759 760 emit(NULL, op, dst, src0); 761} 762 763/** 764 * Emit an TGSI_OPCODE_SCS instruction 765 * 766 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 767 * Instead of splatting its result across all four components of the 768 * destination, it writes one value to the \c x component and another value to 769 * the \c y component. 770 * 771 * \param ir IR instruction being processed 772 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 773 * on which value is desired. 774 * \param dst Destination register 775 * \param src Source register 776 */ 777void 778glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 779 st_dst_reg dst, 780 const st_src_reg &src) 781{ 782 /* Vertex programs cannot use the SCS opcode. 783 */ 784 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 785 emit_scalar(ir, op, dst, src); 786 return; 787 } 788 789 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 790 const unsigned scs_mask = (1U << component); 791 int done_mask = ~dst.writemask; 792 st_src_reg tmp; 793 794 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 795 796 /* If there are compnents in the destination that differ from the component 797 * that will be written by the SCS instrution, we'll need a temporary. 798 */ 799 if (scs_mask != unsigned(dst.writemask)) { 800 tmp = get_temp(glsl_type::vec4_type); 801 } 802 803 for (unsigned i = 0; i < 4; i++) { 804 unsigned this_mask = (1U << i); 805 st_src_reg src0 = src; 806 807 if ((done_mask & this_mask) != 0) 808 continue; 809 810 /* The source swizzle specified which component of the source generates 811 * sine / cosine for the current component in the destination. The SCS 812 * instruction requires that this value be swizzle to the X component. 813 * Replace the current swizzle with a swizzle that puts the source in 814 * the X component. 815 */ 816 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 817 818 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 819 src0_swiz, src0_swiz); 820 for (unsigned j = i + 1; j < 4; j++) { 821 /* If there is another enabled component in the destination that is 822 * derived from the same inputs, generate its value on this pass as 823 * well. 824 */ 825 if (!(done_mask & (1 << j)) && 826 GET_SWZ(src0.swizzle, j) == src0_swiz) { 827 this_mask |= (1 << j); 828 } 829 } 830 831 if (this_mask != scs_mask) { 832 glsl_to_tgsi_instruction *inst; 833 st_dst_reg tmp_dst = st_dst_reg(tmp); 834 835 /* Emit the SCS instruction. 836 */ 837 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 838 inst->dst.writemask = scs_mask; 839 840 /* Move the result of the SCS instruction to the desired location in 841 * the destination. 842 */ 843 tmp.swizzle = MAKE_SWIZZLE4(component, component, 844 component, component); 845 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 846 inst->dst.writemask = this_mask; 847 } else { 848 /* Emit the SCS instruction to write directly to the destination. 849 */ 850 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 851 inst->dst.writemask = scs_mask; 852 } 853 854 done_mask |= this_mask; 855 } 856} 857 858int 859glsl_to_tgsi_visitor::add_constant(gl_register_file file, 860 gl_constant_value values[4], int size, int datatype, 861 GLuint *swizzle_out) 862{ 863 if (file == PROGRAM_CONSTANT) { 864 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 865 size, datatype, swizzle_out); 866 } else { 867 int index = 0; 868 immediate_storage *entry; 869 assert(file == PROGRAM_IMMEDIATE); 870 871 /* Search immediate storage to see if we already have an identical 872 * immediate that we can use instead of adding a duplicate entry. 873 */ 874 foreach_iter(exec_list_iterator, iter, this->immediates) { 875 entry = (immediate_storage *)iter.get(); 876 877 if (entry->size == size && 878 entry->type == datatype && 879 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 880 return index; 881 } 882 index++; 883 } 884 885 /* Add this immediate to the list. */ 886 entry = new(mem_ctx) immediate_storage(values, size, datatype); 887 this->immediates.push_tail(entry); 888 this->num_immediates++; 889 return index; 890 } 891} 892 893st_src_reg 894glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 895{ 896 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 897 union gl_constant_value uval; 898 899 uval.f = val; 900 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 901 902 return src; 903} 904 905st_src_reg 906glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 907{ 908 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 909 union gl_constant_value uval; 910 911 assert(native_integers); 912 913 uval.i = val; 914 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 915 916 return src; 917} 918 919st_src_reg 920glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 921{ 922 if (native_integers) 923 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 924 st_src_reg_for_int(val); 925 else 926 return st_src_reg_for_float(val); 927} 928 929static int 930type_size(const struct glsl_type *type) 931{ 932 unsigned int i; 933 int size; 934 935 switch (type->base_type) { 936 case GLSL_TYPE_UINT: 937 case GLSL_TYPE_INT: 938 case GLSL_TYPE_FLOAT: 939 case GLSL_TYPE_BOOL: 940 if (type->is_matrix()) { 941 return type->matrix_columns; 942 } else { 943 /* Regardless of size of vector, it gets a vec4. This is bad 944 * packing for things like floats, but otherwise arrays become a 945 * mess. Hopefully a later pass over the code can pack scalars 946 * down if appropriate. 947 */ 948 return 1; 949 } 950 case GLSL_TYPE_ARRAY: 951 assert(type->length > 0); 952 return type_size(type->fields.array) * type->length; 953 case GLSL_TYPE_STRUCT: 954 size = 0; 955 for (i = 0; i < type->length; i++) { 956 size += type_size(type->fields.structure[i].type); 957 } 958 return size; 959 case GLSL_TYPE_SAMPLER: 960 /* Samplers take up one slot in UNIFORMS[], but they're baked in 961 * at link time. 962 */ 963 return 1; 964 default: 965 assert(0); 966 return 0; 967 } 968} 969 970/** 971 * In the initial pass of codegen, we assign temporary numbers to 972 * intermediate results. (not SSA -- variable assignments will reuse 973 * storage). 974 */ 975st_src_reg 976glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 977{ 978 st_src_reg src; 979 980 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 981 src.file = PROGRAM_TEMPORARY; 982 src.index = next_temp; 983 src.reladdr = NULL; 984 next_temp += type_size(type); 985 986 if (type->is_array() || type->is_record()) { 987 src.swizzle = SWIZZLE_NOOP; 988 } else { 989 src.swizzle = swizzle_for_size(type->vector_elements); 990 } 991 src.negate = 0; 992 993 return src; 994} 995 996variable_storage * 997glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 998{ 999 1000 variable_storage *entry; 1001 1002 foreach_iter(exec_list_iterator, iter, this->variables) { 1003 entry = (variable_storage *)iter.get(); 1004 1005 if (entry->var == var) 1006 return entry; 1007 } 1008 1009 return NULL; 1010} 1011 1012void 1013glsl_to_tgsi_visitor::visit(ir_variable *ir) 1014{ 1015 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1016 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1017 1018 fp->OriginUpperLeft = ir->origin_upper_left; 1019 fp->PixelCenterInteger = ir->pixel_center_integer; 1020 } 1021 1022 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1023 unsigned int i; 1024 const ir_state_slot *const slots = ir->state_slots; 1025 assert(ir->state_slots != NULL); 1026 1027 /* Check if this statevar's setup in the STATE file exactly 1028 * matches how we'll want to reference it as a 1029 * struct/array/whatever. If not, then we need to move it into 1030 * temporary storage and hope that it'll get copy-propagated 1031 * out. 1032 */ 1033 for (i = 0; i < ir->num_state_slots; i++) { 1034 if (slots[i].swizzle != SWIZZLE_XYZW) { 1035 break; 1036 } 1037 } 1038 1039 variable_storage *storage; 1040 st_dst_reg dst; 1041 if (i == ir->num_state_slots) { 1042 /* We'll set the index later. */ 1043 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1044 this->variables.push_tail(storage); 1045 1046 dst = undef_dst; 1047 } else { 1048 /* The variable_storage constructor allocates slots based on the size 1049 * of the type. However, this had better match the number of state 1050 * elements that we're going to copy into the new temporary. 1051 */ 1052 assert((int) ir->num_state_slots == type_size(ir->type)); 1053 1054 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1055 this->next_temp); 1056 this->variables.push_tail(storage); 1057 this->next_temp += type_size(ir->type); 1058 1059 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1060 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1061 } 1062 1063 1064 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1065 int index = _mesa_add_state_reference(this->prog->Parameters, 1066 (gl_state_index *)slots[i].tokens); 1067 1068 if (storage->file == PROGRAM_STATE_VAR) { 1069 if (storage->index == -1) { 1070 storage->index = index; 1071 } else { 1072 assert(index == storage->index + (int)i); 1073 } 1074 } else { 1075 st_src_reg src(PROGRAM_STATE_VAR, index, 1076 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1077 src.swizzle = slots[i].swizzle; 1078 emit(ir, TGSI_OPCODE_MOV, dst, src); 1079 /* even a float takes up a whole vec4 reg in a struct/array. */ 1080 dst.index++; 1081 } 1082 } 1083 1084 if (storage->file == PROGRAM_TEMPORARY && 1085 dst.index != storage->index + (int) ir->num_state_slots) { 1086 fail_link(this->shader_program, 1087 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1088 ir->name, dst.index - storage->index, 1089 type_size(ir->type)); 1090 } 1091 } 1092} 1093 1094void 1095glsl_to_tgsi_visitor::visit(ir_loop *ir) 1096{ 1097 ir_dereference_variable *counter = NULL; 1098 1099 if (ir->counter != NULL) 1100 counter = new(ir) ir_dereference_variable(ir->counter); 1101 1102 if (ir->from != NULL) { 1103 assert(ir->counter != NULL); 1104 1105 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1106 1107 a->accept(this); 1108 delete a; 1109 } 1110 1111 emit(NULL, TGSI_OPCODE_BGNLOOP); 1112 1113 if (ir->to) { 1114 ir_expression *e = 1115 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1116 counter, ir->to); 1117 ir_if *if_stmt = new(ir) ir_if(e); 1118 1119 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1120 1121 if_stmt->then_instructions.push_tail(brk); 1122 1123 if_stmt->accept(this); 1124 1125 delete if_stmt; 1126 delete e; 1127 delete brk; 1128 } 1129 1130 visit_exec_list(&ir->body_instructions, this); 1131 1132 if (ir->increment) { 1133 ir_expression *e = 1134 new(ir) ir_expression(ir_binop_add, counter->type, 1135 counter, ir->increment); 1136 1137 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1138 1139 a->accept(this); 1140 delete a; 1141 delete e; 1142 } 1143 1144 emit(NULL, TGSI_OPCODE_ENDLOOP); 1145} 1146 1147void 1148glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1149{ 1150 switch (ir->mode) { 1151 case ir_loop_jump::jump_break: 1152 emit(NULL, TGSI_OPCODE_BRK); 1153 break; 1154 case ir_loop_jump::jump_continue: 1155 emit(NULL, TGSI_OPCODE_CONT); 1156 break; 1157 } 1158} 1159 1160 1161void 1162glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1163{ 1164 assert(0); 1165 (void)ir; 1166} 1167 1168void 1169glsl_to_tgsi_visitor::visit(ir_function *ir) 1170{ 1171 /* Ignore function bodies other than main() -- we shouldn't see calls to 1172 * them since they should all be inlined before we get to glsl_to_tgsi. 1173 */ 1174 if (strcmp(ir->name, "main") == 0) { 1175 const ir_function_signature *sig; 1176 exec_list empty; 1177 1178 sig = ir->matching_signature(&empty); 1179 1180 assert(sig); 1181 1182 foreach_iter(exec_list_iterator, iter, sig->body) { 1183 ir_instruction *ir = (ir_instruction *)iter.get(); 1184 1185 ir->accept(this); 1186 } 1187 } 1188} 1189 1190bool 1191glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1192{ 1193 int nonmul_operand = 1 - mul_operand; 1194 st_src_reg a, b, c; 1195 st_dst_reg result_dst; 1196 1197 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1198 if (!expr || expr->operation != ir_binop_mul) 1199 return false; 1200 1201 expr->operands[0]->accept(this); 1202 a = this->result; 1203 expr->operands[1]->accept(this); 1204 b = this->result; 1205 ir->operands[nonmul_operand]->accept(this); 1206 c = this->result; 1207 1208 this->result = get_temp(ir->type); 1209 result_dst = st_dst_reg(this->result); 1210 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1211 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1212 1213 return true; 1214} 1215 1216/** 1217 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1218 * 1219 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1220 * implemented using multiplication, and logical-or is implemented using 1221 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1222 * As result, the logical expression (a & !b) can be rewritten as: 1223 * 1224 * - a * !b 1225 * - a * (1 - b) 1226 * - (a * 1) - (a * b) 1227 * - a + -(a * b) 1228 * - a + (a * -b) 1229 * 1230 * This final expression can be implemented as a single MAD(a, -b, a) 1231 * instruction. 1232 */ 1233bool 1234glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1235{ 1236 const int other_operand = 1 - try_operand; 1237 st_src_reg a, b; 1238 1239 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1240 if (!expr || expr->operation != ir_unop_logic_not) 1241 return false; 1242 1243 ir->operands[other_operand]->accept(this); 1244 a = this->result; 1245 expr->operands[0]->accept(this); 1246 b = this->result; 1247 1248 b.negate = ~b.negate; 1249 1250 this->result = get_temp(ir->type); 1251 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1252 1253 return true; 1254} 1255 1256bool 1257glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1258{ 1259 /* Saturates were only introduced to vertex programs in 1260 * NV_vertex_program3, so don't give them to drivers in the VP. 1261 */ 1262 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1263 return false; 1264 1265 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1266 if (!sat_src) 1267 return false; 1268 1269 sat_src->accept(this); 1270 st_src_reg src = this->result; 1271 1272 /* If we generated an expression instruction into a temporary in 1273 * processing the saturate's operand, apply the saturate to that 1274 * instruction. Otherwise, generate a MOV to do the saturate. 1275 * 1276 * Note that we have to be careful to only do this optimization if 1277 * the instruction in question was what generated src->result. For 1278 * example, ir_dereference_array might generate a MUL instruction 1279 * to create the reladdr, and return us a src reg using that 1280 * reladdr. That MUL result is not the value we're trying to 1281 * saturate. 1282 */ 1283 ir_expression *sat_src_expr = sat_src->as_expression(); 1284 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1285 sat_src_expr->operation == ir_binop_add || 1286 sat_src_expr->operation == ir_binop_dot)) { 1287 glsl_to_tgsi_instruction *new_inst; 1288 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1289 new_inst->saturate = true; 1290 } else { 1291 this->result = get_temp(ir->type); 1292 st_dst_reg result_dst = st_dst_reg(this->result); 1293 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1294 glsl_to_tgsi_instruction *inst; 1295 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1296 inst->saturate = true; 1297 } 1298 1299 return true; 1300} 1301 1302void 1303glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1304 st_src_reg *reg, int *num_reladdr) 1305{ 1306 if (!reg->reladdr) 1307 return; 1308 1309 emit_arl(ir, address_reg, *reg->reladdr); 1310 1311 if (*num_reladdr != 1) { 1312 st_src_reg temp = get_temp(glsl_type::vec4_type); 1313 1314 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1315 *reg = temp; 1316 } 1317 1318 (*num_reladdr)--; 1319} 1320 1321void 1322glsl_to_tgsi_visitor::visit(ir_expression *ir) 1323{ 1324 unsigned int operand; 1325 st_src_reg op[Elements(ir->operands)]; 1326 st_src_reg result_src; 1327 st_dst_reg result_dst; 1328 1329 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1330 */ 1331 if (ir->operation == ir_binop_add) { 1332 if (try_emit_mad(ir, 1)) 1333 return; 1334 if (try_emit_mad(ir, 0)) 1335 return; 1336 } 1337 1338 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1339 */ 1340 if (ir->operation == ir_binop_logic_and) { 1341 if (try_emit_mad_for_and_not(ir, 1)) 1342 return; 1343 if (try_emit_mad_for_and_not(ir, 0)) 1344 return; 1345 } 1346 1347 if (try_emit_sat(ir)) 1348 return; 1349 1350 if (ir->operation == ir_quadop_vector) 1351 assert(!"ir_quadop_vector should have been lowered"); 1352 1353 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1354 this->result.file = PROGRAM_UNDEFINED; 1355 ir->operands[operand]->accept(this); 1356 if (this->result.file == PROGRAM_UNDEFINED) { 1357 ir_print_visitor v; 1358 printf("Failed to get tree for expression operand:\n"); 1359 ir->operands[operand]->accept(&v); 1360 exit(1); 1361 } 1362 op[operand] = this->result; 1363 1364 /* Matrix expression operands should have been broken down to vector 1365 * operations already. 1366 */ 1367 assert(!ir->operands[operand]->type->is_matrix()); 1368 } 1369 1370 int vector_elements = ir->operands[0]->type->vector_elements; 1371 if (ir->operands[1]) { 1372 vector_elements = MAX2(vector_elements, 1373 ir->operands[1]->type->vector_elements); 1374 } 1375 1376 this->result.file = PROGRAM_UNDEFINED; 1377 1378 /* Storage for our result. Ideally for an assignment we'd be using 1379 * the actual storage for the result here, instead. 1380 */ 1381 result_src = get_temp(ir->type); 1382 /* convenience for the emit functions below. */ 1383 result_dst = st_dst_reg(result_src); 1384 /* Limit writes to the channels that will be used by result_src later. 1385 * This does limit this temp's use as a temporary for multi-instruction 1386 * sequences. 1387 */ 1388 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1389 1390 switch (ir->operation) { 1391 case ir_unop_logic_not: 1392 if (result_dst.type != GLSL_TYPE_FLOAT) 1393 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1394 else { 1395 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1396 * older GPUs implement SEQ using multiple instructions (i915 uses two 1397 * SGE instructions and a MUL instruction). Since our logic values are 1398 * 0.0 and 1.0, 1-x also implements !x. 1399 */ 1400 op[0].negate = ~op[0].negate; 1401 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1402 } 1403 break; 1404 case ir_unop_neg: 1405 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1406 if (result_dst.type == GLSL_TYPE_INT) 1407 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1408 else { 1409 op[0].negate = ~op[0].negate; 1410 result_src = op[0]; 1411 } 1412 break; 1413 case ir_unop_abs: 1414 assert(result_dst.type == GLSL_TYPE_FLOAT); 1415 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1416 break; 1417 case ir_unop_sign: 1418 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1419 break; 1420 case ir_unop_rcp: 1421 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1422 break; 1423 1424 case ir_unop_exp2: 1425 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1426 break; 1427 case ir_unop_exp: 1428 case ir_unop_log: 1429 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1430 break; 1431 case ir_unop_log2: 1432 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1433 break; 1434 case ir_unop_sin: 1435 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1436 break; 1437 case ir_unop_cos: 1438 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1439 break; 1440 case ir_unop_sin_reduced: 1441 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1442 break; 1443 case ir_unop_cos_reduced: 1444 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1445 break; 1446 1447 case ir_unop_dFdx: 1448 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1449 break; 1450 case ir_unop_dFdy: 1451 op[0].negate = ~op[0].negate; 1452 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1453 break; 1454 1455 case ir_unop_noise: { 1456 /* At some point, a motivated person could add a better 1457 * implementation of noise. Currently not even the nvidia 1458 * binary drivers do anything more than this. In any case, the 1459 * place to do this is in the GL state tracker, not the poor 1460 * driver. 1461 */ 1462 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1463 break; 1464 } 1465 1466 case ir_binop_add: 1467 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1468 break; 1469 case ir_binop_sub: 1470 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1471 break; 1472 1473 case ir_binop_mul: 1474 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1475 break; 1476 case ir_binop_div: 1477 if (result_dst.type == GLSL_TYPE_FLOAT) 1478 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1479 else 1480 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1481 break; 1482 case ir_binop_mod: 1483 if (result_dst.type == GLSL_TYPE_FLOAT) 1484 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1485 else 1486 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1487 break; 1488 1489 case ir_binop_less: 1490 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1491 break; 1492 case ir_binop_greater: 1493 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1494 break; 1495 case ir_binop_lequal: 1496 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1497 break; 1498 case ir_binop_gequal: 1499 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1500 break; 1501 case ir_binop_equal: 1502 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1503 break; 1504 case ir_binop_nequal: 1505 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1506 break; 1507 case ir_binop_all_equal: 1508 /* "==" operator producing a scalar boolean. */ 1509 if (ir->operands[0]->type->is_vector() || 1510 ir->operands[1]->type->is_vector()) { 1511 st_src_reg temp = get_temp(native_integers ? 1512 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1513 glsl_type::vec4_type); 1514 1515 if (native_integers) { 1516 st_dst_reg temp_dst = st_dst_reg(temp); 1517 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1518 1519 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1520 1521 /* Emit 1-3 AND operations to combine the SEQ results. */ 1522 switch (ir->operands[0]->type->vector_elements) { 1523 case 2: 1524 break; 1525 case 3: 1526 temp_dst.writemask = WRITEMASK_Y; 1527 temp1.swizzle = SWIZZLE_YYYY; 1528 temp2.swizzle = SWIZZLE_ZZZZ; 1529 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1530 break; 1531 case 4: 1532 temp_dst.writemask = WRITEMASK_X; 1533 temp1.swizzle = SWIZZLE_XXXX; 1534 temp2.swizzle = SWIZZLE_YYYY; 1535 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1536 temp_dst.writemask = WRITEMASK_Y; 1537 temp1.swizzle = SWIZZLE_ZZZZ; 1538 temp2.swizzle = SWIZZLE_WWWW; 1539 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1540 } 1541 1542 temp1.swizzle = SWIZZLE_XXXX; 1543 temp2.swizzle = SWIZZLE_YYYY; 1544 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1545 } else { 1546 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1547 1548 /* After the dot-product, the value will be an integer on the 1549 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1550 */ 1551 emit_dp(ir, result_dst, temp, temp, vector_elements); 1552 1553 /* Negating the result of the dot-product gives values on the range 1554 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1555 * This is achieved using SGE. 1556 */ 1557 st_src_reg sge_src = result_src; 1558 sge_src.negate = ~sge_src.negate; 1559 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1560 } 1561 } else { 1562 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1563 } 1564 break; 1565 case ir_binop_any_nequal: 1566 /* "!=" operator producing a scalar boolean. */ 1567 if (ir->operands[0]->type->is_vector() || 1568 ir->operands[1]->type->is_vector()) { 1569 st_src_reg temp = get_temp(native_integers ? 1570 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1571 glsl_type::vec4_type); 1572 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1573 1574 if (native_integers) { 1575 st_dst_reg temp_dst = st_dst_reg(temp); 1576 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1577 1578 /* Emit 1-3 OR operations to combine the SNE results. */ 1579 switch (ir->operands[0]->type->vector_elements) { 1580 case 2: 1581 break; 1582 case 3: 1583 temp_dst.writemask = WRITEMASK_Y; 1584 temp1.swizzle = SWIZZLE_YYYY; 1585 temp2.swizzle = SWIZZLE_ZZZZ; 1586 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1587 break; 1588 case 4: 1589 temp_dst.writemask = WRITEMASK_X; 1590 temp1.swizzle = SWIZZLE_XXXX; 1591 temp2.swizzle = SWIZZLE_YYYY; 1592 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1593 temp_dst.writemask = WRITEMASK_Y; 1594 temp1.swizzle = SWIZZLE_ZZZZ; 1595 temp2.swizzle = SWIZZLE_WWWW; 1596 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1597 } 1598 1599 temp1.swizzle = SWIZZLE_XXXX; 1600 temp2.swizzle = SWIZZLE_YYYY; 1601 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1602 } else { 1603 /* After the dot-product, the value will be an integer on the 1604 * range [0,4]. Zero stays zero, and positive values become 1.0. 1605 */ 1606 glsl_to_tgsi_instruction *const dp = 1607 emit_dp(ir, result_dst, temp, temp, vector_elements); 1608 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1609 /* The clamping to [0,1] can be done for free in the fragment 1610 * shader with a saturate. 1611 */ 1612 dp->saturate = true; 1613 } else { 1614 /* Negating the result of the dot-product gives values on the range 1615 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1616 * achieved using SLT. 1617 */ 1618 st_src_reg slt_src = result_src; 1619 slt_src.negate = ~slt_src.negate; 1620 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1621 } 1622 } 1623 } else { 1624 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1625 } 1626 break; 1627 1628 case ir_unop_any: { 1629 assert(ir->operands[0]->type->is_vector()); 1630 1631 /* After the dot-product, the value will be an integer on the 1632 * range [0,4]. Zero stays zero, and positive values become 1.0. 1633 */ 1634 glsl_to_tgsi_instruction *const dp = 1635 emit_dp(ir, result_dst, op[0], op[0], 1636 ir->operands[0]->type->vector_elements); 1637 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1638 result_dst.type == GLSL_TYPE_FLOAT) { 1639 /* The clamping to [0,1] can be done for free in the fragment 1640 * shader with a saturate. 1641 */ 1642 dp->saturate = true; 1643 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1644 /* Negating the result of the dot-product gives values on the range 1645 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1646 * is achieved using SLT. 1647 */ 1648 st_src_reg slt_src = result_src; 1649 slt_src.negate = ~slt_src.negate; 1650 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1651 } 1652 else { 1653 /* Use SNE 0 if integers are being used as boolean values. */ 1654 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1655 } 1656 break; 1657 } 1658 1659 case ir_binop_logic_xor: 1660 if (native_integers) 1661 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1662 else 1663 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1664 break; 1665 1666 case ir_binop_logic_or: { 1667 if (native_integers) { 1668 /* If integers are used as booleans, we can use an actual "or" 1669 * instruction. 1670 */ 1671 assert(native_integers); 1672 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1673 } else { 1674 /* After the addition, the value will be an integer on the 1675 * range [0,2]. Zero stays zero, and positive values become 1.0. 1676 */ 1677 glsl_to_tgsi_instruction *add = 1678 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1679 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1680 /* The clamping to [0,1] can be done for free in the fragment 1681 * shader with a saturate if floats are being used as boolean values. 1682 */ 1683 add->saturate = true; 1684 } else { 1685 /* Negating the result of the addition gives values on the range 1686 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1687 * is achieved using SLT. 1688 */ 1689 st_src_reg slt_src = result_src; 1690 slt_src.negate = ~slt_src.negate; 1691 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1692 } 1693 } 1694 break; 1695 } 1696 1697 case ir_binop_logic_and: 1698 /* If native integers are disabled, the bool args are stored as float 0.0 1699 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1700 * actual AND opcode. 1701 */ 1702 if (native_integers) 1703 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1704 else 1705 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1706 break; 1707 1708 case ir_binop_dot: 1709 assert(ir->operands[0]->type->is_vector()); 1710 assert(ir->operands[0]->type == ir->operands[1]->type); 1711 emit_dp(ir, result_dst, op[0], op[1], 1712 ir->operands[0]->type->vector_elements); 1713 break; 1714 1715 case ir_unop_sqrt: 1716 /* sqrt(x) = x * rsq(x). */ 1717 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1718 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1719 /* For incoming channels <= 0, set the result to 0. */ 1720 op[0].negate = ~op[0].negate; 1721 emit(ir, TGSI_OPCODE_CMP, result_dst, 1722 op[0], result_src, st_src_reg_for_float(0.0)); 1723 break; 1724 case ir_unop_rsq: 1725 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1726 break; 1727 case ir_unop_i2f: 1728 if (native_integers) { 1729 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1730 break; 1731 } 1732 /* fallthrough to next case otherwise */ 1733 case ir_unop_b2f: 1734 if (native_integers) { 1735 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1736 break; 1737 } 1738 /* fallthrough to next case otherwise */ 1739 case ir_unop_i2u: 1740 case ir_unop_u2i: 1741 /* Converting between signed and unsigned integers is a no-op. */ 1742 result_src = op[0]; 1743 break; 1744 case ir_unop_b2i: 1745 if (native_integers) { 1746 /* Booleans are stored as integers using ~0 for true and 0 for false. 1747 * GLSL requires that int(bool) return 1 for true and 0 for false. 1748 * This conversion is done with AND, but it could be done with NEG. 1749 */ 1750 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1751 } else { 1752 /* Booleans and integers are both stored as floats when native 1753 * integers are disabled. 1754 */ 1755 result_src = op[0]; 1756 } 1757 break; 1758 case ir_unop_f2i: 1759 if (native_integers) 1760 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1761 else 1762 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1763 break; 1764 case ir_unop_f2b: 1765 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1766 break; 1767 case ir_unop_i2b: 1768 if (native_integers) 1769 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1770 else 1771 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1772 break; 1773 case ir_unop_trunc: 1774 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1775 break; 1776 case ir_unop_ceil: 1777 op[0].negate = ~op[0].negate; 1778 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1779 result_src.negate = ~result_src.negate; 1780 break; 1781 case ir_unop_floor: 1782 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1783 break; 1784 case ir_unop_round_even: 1785 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1786 break; 1787 case ir_unop_fract: 1788 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1789 break; 1790 1791 case ir_binop_min: 1792 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1793 break; 1794 case ir_binop_max: 1795 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1796 break; 1797 case ir_binop_pow: 1798 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1799 break; 1800 1801 case ir_unop_bit_not: 1802 if (native_integers) { 1803 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1804 break; 1805 } 1806 case ir_unop_u2f: 1807 if (native_integers) { 1808 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1809 break; 1810 } 1811 case ir_binop_lshift: 1812 if (native_integers) { 1813 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1814 break; 1815 } 1816 case ir_binop_rshift: 1817 if (native_integers) { 1818 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1819 break; 1820 } 1821 case ir_binop_bit_and: 1822 if (native_integers) { 1823 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1824 break; 1825 } 1826 case ir_binop_bit_xor: 1827 if (native_integers) { 1828 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1829 break; 1830 } 1831 case ir_binop_bit_or: 1832 if (native_integers) { 1833 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1834 break; 1835 } 1836 1837 assert(!"GLSL 1.30 features unsupported"); 1838 break; 1839 1840 case ir_quadop_vector: 1841 /* This operation should have already been handled. 1842 */ 1843 assert(!"Should not get here."); 1844 break; 1845 } 1846 1847 this->result = result_src; 1848} 1849 1850 1851void 1852glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1853{ 1854 st_src_reg src; 1855 int i; 1856 int swizzle[4]; 1857 1858 /* Note that this is only swizzles in expressions, not those on the left 1859 * hand side of an assignment, which do write masking. See ir_assignment 1860 * for that. 1861 */ 1862 1863 ir->val->accept(this); 1864 src = this->result; 1865 assert(src.file != PROGRAM_UNDEFINED); 1866 1867 for (i = 0; i < 4; i++) { 1868 if (i < ir->type->vector_elements) { 1869 switch (i) { 1870 case 0: 1871 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1872 break; 1873 case 1: 1874 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1875 break; 1876 case 2: 1877 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1878 break; 1879 case 3: 1880 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1881 break; 1882 } 1883 } else { 1884 /* If the type is smaller than a vec4, replicate the last 1885 * channel out. 1886 */ 1887 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1888 } 1889 } 1890 1891 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1892 1893 this->result = src; 1894} 1895 1896void 1897glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1898{ 1899 variable_storage *entry = find_variable_storage(ir->var); 1900 ir_variable *var = ir->var; 1901 1902 if (!entry) { 1903 switch (var->mode) { 1904 case ir_var_uniform: 1905 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1906 var->location); 1907 this->variables.push_tail(entry); 1908 break; 1909 case ir_var_in: 1910 case ir_var_inout: 1911 /* The linker assigns locations for varyings and attributes, 1912 * including deprecated builtins (like gl_Color), user-assign 1913 * generic attributes (glBindVertexLocation), and 1914 * user-defined varyings. 1915 * 1916 * FINISHME: We would hit this path for function arguments. Fix! 1917 */ 1918 assert(var->location != -1); 1919 entry = new(mem_ctx) variable_storage(var, 1920 PROGRAM_INPUT, 1921 var->location); 1922 break; 1923 case ir_var_out: 1924 assert(var->location != -1); 1925 entry = new(mem_ctx) variable_storage(var, 1926 PROGRAM_OUTPUT, 1927 var->location); 1928 break; 1929 case ir_var_system_value: 1930 entry = new(mem_ctx) variable_storage(var, 1931 PROGRAM_SYSTEM_VALUE, 1932 var->location); 1933 break; 1934 case ir_var_auto: 1935 case ir_var_temporary: 1936 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1937 this->next_temp); 1938 this->variables.push_tail(entry); 1939 1940 next_temp += type_size(var->type); 1941 break; 1942 } 1943 1944 if (!entry) { 1945 printf("Failed to make storage for %s\n", var->name); 1946 exit(1); 1947 } 1948 } 1949 1950 this->result = st_src_reg(entry->file, entry->index, var->type); 1951 if (!native_integers) 1952 this->result.type = GLSL_TYPE_FLOAT; 1953} 1954 1955void 1956glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1957{ 1958 ir_constant *index; 1959 st_src_reg src; 1960 int element_size = type_size(ir->type); 1961 1962 index = ir->array_index->constant_expression_value(); 1963 1964 ir->array->accept(this); 1965 src = this->result; 1966 1967 if (index) { 1968 src.index += index->value.i[0] * element_size; 1969 } else { 1970 /* Variable index array dereference. It eats the "vec4" of the 1971 * base of the array and an index that offsets the TGSI register 1972 * index. 1973 */ 1974 ir->array_index->accept(this); 1975 1976 st_src_reg index_reg; 1977 1978 if (element_size == 1) { 1979 index_reg = this->result; 1980 } else { 1981 index_reg = get_temp(native_integers ? 1982 glsl_type::int_type : glsl_type::float_type); 1983 1984 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1985 this->result, st_src_reg_for_type(index_reg.type, element_size)); 1986 } 1987 1988 /* If there was already a relative address register involved, add the 1989 * new and the old together to get the new offset. 1990 */ 1991 if (src.reladdr != NULL) { 1992 st_src_reg accum_reg = get_temp(native_integers ? 1993 glsl_type::int_type : glsl_type::float_type); 1994 1995 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 1996 index_reg, *src.reladdr); 1997 1998 index_reg = accum_reg; 1999 } 2000 2001 src.reladdr = ralloc(mem_ctx, st_src_reg); 2002 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2003 } 2004 2005 /* If the type is smaller than a vec4, replicate the last channel out. */ 2006 if (ir->type->is_scalar() || ir->type->is_vector()) 2007 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2008 else 2009 src.swizzle = SWIZZLE_NOOP; 2010 2011 this->result = src; 2012} 2013 2014void 2015glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2016{ 2017 unsigned int i; 2018 const glsl_type *struct_type = ir->record->type; 2019 int offset = 0; 2020 2021 ir->record->accept(this); 2022 2023 for (i = 0; i < struct_type->length; i++) { 2024 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2025 break; 2026 offset += type_size(struct_type->fields.structure[i].type); 2027 } 2028 2029 /* If the type is smaller than a vec4, replicate the last channel out. */ 2030 if (ir->type->is_scalar() || ir->type->is_vector()) 2031 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2032 else 2033 this->result.swizzle = SWIZZLE_NOOP; 2034 2035 this->result.index += offset; 2036} 2037 2038/** 2039 * We want to be careful in assignment setup to hit the actual storage 2040 * instead of potentially using a temporary like we might with the 2041 * ir_dereference handler. 2042 */ 2043static st_dst_reg 2044get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2045{ 2046 /* The LHS must be a dereference. If the LHS is a variable indexed array 2047 * access of a vector, it must be separated into a series conditional moves 2048 * before reaching this point (see ir_vec_index_to_cond_assign). 2049 */ 2050 assert(ir->as_dereference()); 2051 ir_dereference_array *deref_array = ir->as_dereference_array(); 2052 if (deref_array) { 2053 assert(!deref_array->array->type->is_vector()); 2054 } 2055 2056 /* Use the rvalue deref handler for the most part. We'll ignore 2057 * swizzles in it and write swizzles using writemask, though. 2058 */ 2059 ir->accept(v); 2060 return st_dst_reg(v->result); 2061} 2062 2063/** 2064 * Process the condition of a conditional assignment 2065 * 2066 * Examines the condition of a conditional assignment to generate the optimal 2067 * first operand of a \c CMP instruction. If the condition is a relational 2068 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2069 * used as the source for the \c CMP instruction. Otherwise the comparison 2070 * is processed to a boolean result, and the boolean result is used as the 2071 * operand to the CMP instruction. 2072 */ 2073bool 2074glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2075{ 2076 ir_rvalue *src_ir = ir; 2077 bool negate = true; 2078 bool switch_order = false; 2079 2080 ir_expression *const expr = ir->as_expression(); 2081 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2082 bool zero_on_left = false; 2083 2084 if (expr->operands[0]->is_zero()) { 2085 src_ir = expr->operands[1]; 2086 zero_on_left = true; 2087 } else if (expr->operands[1]->is_zero()) { 2088 src_ir = expr->operands[0]; 2089 zero_on_left = false; 2090 } 2091 2092 /* a is - 0 + - 0 + 2093 * (a < 0) T F F ( a < 0) T F F 2094 * (0 < a) F F T (-a < 0) F F T 2095 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2096 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2097 * (a > 0) F F T (-a < 0) F F T 2098 * (0 > a) T F F ( a < 0) T F F 2099 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2100 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2101 * 2102 * Note that exchanging the order of 0 and 'a' in the comparison simply 2103 * means that the value of 'a' should be negated. 2104 */ 2105 if (src_ir != ir) { 2106 switch (expr->operation) { 2107 case ir_binop_less: 2108 switch_order = false; 2109 negate = zero_on_left; 2110 break; 2111 2112 case ir_binop_greater: 2113 switch_order = false; 2114 negate = !zero_on_left; 2115 break; 2116 2117 case ir_binop_lequal: 2118 switch_order = true; 2119 negate = !zero_on_left; 2120 break; 2121 2122 case ir_binop_gequal: 2123 switch_order = true; 2124 negate = zero_on_left; 2125 break; 2126 2127 default: 2128 /* This isn't the right kind of comparison afterall, so make sure 2129 * the whole condition is visited. 2130 */ 2131 src_ir = ir; 2132 break; 2133 } 2134 } 2135 } 2136 2137 src_ir->accept(this); 2138 2139 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2140 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2141 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2142 * computing the condition. 2143 */ 2144 if (negate) 2145 this->result.negate = ~this->result.negate; 2146 2147 return switch_order; 2148} 2149 2150void 2151glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2152{ 2153 st_dst_reg l; 2154 st_src_reg r; 2155 int i; 2156 2157 ir->rhs->accept(this); 2158 r = this->result; 2159 2160 l = get_assignment_lhs(ir->lhs, this); 2161 2162 /* FINISHME: This should really set to the correct maximal writemask for each 2163 * FINISHME: component written (in the loops below). This case can only 2164 * FINISHME: occur for matrices, arrays, and structures. 2165 */ 2166 if (ir->write_mask == 0) { 2167 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2168 l.writemask = WRITEMASK_XYZW; 2169 } else if (ir->lhs->type->is_scalar() && 2170 ir->lhs->variable_referenced()->mode == ir_var_out) { 2171 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2172 * FINISHME: W component of fragment shader output zero, work correctly. 2173 */ 2174 l.writemask = WRITEMASK_XYZW; 2175 } else { 2176 int swizzles[4]; 2177 int first_enabled_chan = 0; 2178 int rhs_chan = 0; 2179 2180 l.writemask = ir->write_mask; 2181 2182 for (int i = 0; i < 4; i++) { 2183 if (l.writemask & (1 << i)) { 2184 first_enabled_chan = GET_SWZ(r.swizzle, i); 2185 break; 2186 } 2187 } 2188 2189 /* Swizzle a small RHS vector into the channels being written. 2190 * 2191 * glsl ir treats write_mask as dictating how many channels are 2192 * present on the RHS while TGSI treats write_mask as just 2193 * showing which channels of the vec4 RHS get written. 2194 */ 2195 for (int i = 0; i < 4; i++) { 2196 if (l.writemask & (1 << i)) 2197 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2198 else 2199 swizzles[i] = first_enabled_chan; 2200 } 2201 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2202 swizzles[2], swizzles[3]); 2203 } 2204 2205 assert(l.file != PROGRAM_UNDEFINED); 2206 assert(r.file != PROGRAM_UNDEFINED); 2207 2208 if (ir->condition) { 2209 const bool switch_order = this->process_move_condition(ir->condition); 2210 st_src_reg condition = this->result; 2211 2212 for (i = 0; i < type_size(ir->lhs->type); i++) { 2213 st_src_reg l_src = st_src_reg(l); 2214 st_src_reg condition_temp = condition; 2215 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2216 2217 if (native_integers) { 2218 /* This is necessary because TGSI's CMP instruction expects the 2219 * condition to be a float, and we store booleans as integers. 2220 * If TGSI had a UCMP instruction or similar, this extra 2221 * instruction would not be necessary. 2222 */ 2223 condition_temp = get_temp(glsl_type::vec4_type); 2224 condition.negate = 0; 2225 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2226 condition_temp.swizzle = condition.swizzle; 2227 } 2228 2229 if (switch_order) { 2230 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2231 } else { 2232 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2233 } 2234 2235 l.index++; 2236 r.index++; 2237 } 2238 } else if (ir->rhs->as_expression() && 2239 this->instructions.get_tail() && 2240 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2241 type_size(ir->lhs->type) == 1 && 2242 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2243 /* To avoid emitting an extra MOV when assigning an expression to a 2244 * variable, emit the last instruction of the expression again, but 2245 * replace the destination register with the target of the assignment. 2246 * Dead code elimination will remove the original instruction. 2247 */ 2248 glsl_to_tgsi_instruction *inst, *new_inst; 2249 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2250 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2251 new_inst->saturate = inst->saturate; 2252 inst->dead_mask = inst->dst.writemask; 2253 } else { 2254 for (i = 0; i < type_size(ir->lhs->type); i++) { 2255 emit(ir, TGSI_OPCODE_MOV, l, r); 2256 l.index++; 2257 r.index++; 2258 } 2259 } 2260} 2261 2262 2263void 2264glsl_to_tgsi_visitor::visit(ir_constant *ir) 2265{ 2266 st_src_reg src; 2267 GLfloat stack_vals[4] = { 0 }; 2268 gl_constant_value *values = (gl_constant_value *) stack_vals; 2269 GLenum gl_type = GL_NONE; 2270 unsigned int i; 2271 static int in_array = 0; 2272 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2273 2274 /* Unfortunately, 4 floats is all we can get into 2275 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2276 * aggregate constant and move each constant value into it. If we 2277 * get lucky, copy propagation will eliminate the extra moves. 2278 */ 2279 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2280 st_src_reg temp_base = get_temp(ir->type); 2281 st_dst_reg temp = st_dst_reg(temp_base); 2282 2283 foreach_iter(exec_list_iterator, iter, ir->components) { 2284 ir_constant *field_value = (ir_constant *)iter.get(); 2285 int size = type_size(field_value->type); 2286 2287 assert(size > 0); 2288 2289 field_value->accept(this); 2290 src = this->result; 2291 2292 for (i = 0; i < (unsigned int)size; i++) { 2293 emit(ir, TGSI_OPCODE_MOV, temp, src); 2294 2295 src.index++; 2296 temp.index++; 2297 } 2298 } 2299 this->result = temp_base; 2300 return; 2301 } 2302 2303 if (ir->type->is_array()) { 2304 st_src_reg temp_base = get_temp(ir->type); 2305 st_dst_reg temp = st_dst_reg(temp_base); 2306 int size = type_size(ir->type->fields.array); 2307 2308 assert(size > 0); 2309 in_array++; 2310 2311 for (i = 0; i < ir->type->length; i++) { 2312 ir->array_elements[i]->accept(this); 2313 src = this->result; 2314 for (int j = 0; j < size; j++) { 2315 emit(ir, TGSI_OPCODE_MOV, temp, src); 2316 2317 src.index++; 2318 temp.index++; 2319 } 2320 } 2321 this->result = temp_base; 2322 in_array--; 2323 return; 2324 } 2325 2326 if (ir->type->is_matrix()) { 2327 st_src_reg mat = get_temp(ir->type); 2328 st_dst_reg mat_column = st_dst_reg(mat); 2329 2330 for (i = 0; i < ir->type->matrix_columns; i++) { 2331 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2332 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2333 2334 src = st_src_reg(file, -1, ir->type->base_type); 2335 src.index = add_constant(file, 2336 values, 2337 ir->type->vector_elements, 2338 GL_FLOAT, 2339 &src.swizzle); 2340 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2341 2342 mat_column.index++; 2343 } 2344 2345 this->result = mat; 2346 return; 2347 } 2348 2349 switch (ir->type->base_type) { 2350 case GLSL_TYPE_FLOAT: 2351 gl_type = GL_FLOAT; 2352 for (i = 0; i < ir->type->vector_elements; i++) { 2353 values[i].f = ir->value.f[i]; 2354 } 2355 break; 2356 case GLSL_TYPE_UINT: 2357 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2358 for (i = 0; i < ir->type->vector_elements; i++) { 2359 if (native_integers) 2360 values[i].u = ir->value.u[i]; 2361 else 2362 values[i].f = ir->value.u[i]; 2363 } 2364 break; 2365 case GLSL_TYPE_INT: 2366 gl_type = native_integers ? GL_INT : GL_FLOAT; 2367 for (i = 0; i < ir->type->vector_elements; i++) { 2368 if (native_integers) 2369 values[i].i = ir->value.i[i]; 2370 else 2371 values[i].f = ir->value.i[i]; 2372 } 2373 break; 2374 case GLSL_TYPE_BOOL: 2375 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2376 for (i = 0; i < ir->type->vector_elements; i++) { 2377 if (native_integers) 2378 values[i].b = ir->value.b[i]; 2379 else 2380 values[i].f = ir->value.b[i]; 2381 } 2382 break; 2383 default: 2384 assert(!"Non-float/uint/int/bool constant"); 2385 } 2386 2387 this->result = st_src_reg(file, -1, ir->type); 2388 this->result.index = add_constant(file, 2389 values, 2390 ir->type->vector_elements, 2391 gl_type, 2392 &this->result.swizzle); 2393} 2394 2395function_entry * 2396glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2397{ 2398 function_entry *entry; 2399 2400 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2401 entry = (function_entry *)iter.get(); 2402 2403 if (entry->sig == sig) 2404 return entry; 2405 } 2406 2407 entry = ralloc(mem_ctx, function_entry); 2408 entry->sig = sig; 2409 entry->sig_id = this->next_signature_id++; 2410 entry->bgn_inst = NULL; 2411 2412 /* Allocate storage for all the parameters. */ 2413 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2414 ir_variable *param = (ir_variable *)iter.get(); 2415 variable_storage *storage; 2416 2417 storage = find_variable_storage(param); 2418 assert(!storage); 2419 2420 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2421 this->next_temp); 2422 this->variables.push_tail(storage); 2423 2424 this->next_temp += type_size(param->type); 2425 } 2426 2427 if (!sig->return_type->is_void()) { 2428 entry->return_reg = get_temp(sig->return_type); 2429 } else { 2430 entry->return_reg = undef_src; 2431 } 2432 2433 this->function_signatures.push_tail(entry); 2434 return entry; 2435} 2436 2437void 2438glsl_to_tgsi_visitor::visit(ir_call *ir) 2439{ 2440 glsl_to_tgsi_instruction *call_inst; 2441 ir_function_signature *sig = ir->get_callee(); 2442 function_entry *entry = get_function_signature(sig); 2443 int i; 2444 2445 /* Process in parameters. */ 2446 exec_list_iterator sig_iter = sig->parameters.iterator(); 2447 foreach_iter(exec_list_iterator, iter, *ir) { 2448 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2449 ir_variable *param = (ir_variable *)sig_iter.get(); 2450 2451 if (param->mode == ir_var_in || 2452 param->mode == ir_var_inout) { 2453 variable_storage *storage = find_variable_storage(param); 2454 assert(storage); 2455 2456 param_rval->accept(this); 2457 st_src_reg r = this->result; 2458 2459 st_dst_reg l; 2460 l.file = storage->file; 2461 l.index = storage->index; 2462 l.reladdr = NULL; 2463 l.writemask = WRITEMASK_XYZW; 2464 l.cond_mask = COND_TR; 2465 2466 for (i = 0; i < type_size(param->type); i++) { 2467 emit(ir, TGSI_OPCODE_MOV, l, r); 2468 l.index++; 2469 r.index++; 2470 } 2471 } 2472 2473 sig_iter.next(); 2474 } 2475 assert(!sig_iter.has_next()); 2476 2477 /* Emit call instruction */ 2478 call_inst = emit(ir, TGSI_OPCODE_CAL); 2479 call_inst->function = entry; 2480 2481 /* Process out parameters. */ 2482 sig_iter = sig->parameters.iterator(); 2483 foreach_iter(exec_list_iterator, iter, *ir) { 2484 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2485 ir_variable *param = (ir_variable *)sig_iter.get(); 2486 2487 if (param->mode == ir_var_out || 2488 param->mode == ir_var_inout) { 2489 variable_storage *storage = find_variable_storage(param); 2490 assert(storage); 2491 2492 st_src_reg r; 2493 r.file = storage->file; 2494 r.index = storage->index; 2495 r.reladdr = NULL; 2496 r.swizzle = SWIZZLE_NOOP; 2497 r.negate = 0; 2498 2499 param_rval->accept(this); 2500 st_dst_reg l = st_dst_reg(this->result); 2501 2502 for (i = 0; i < type_size(param->type); i++) { 2503 emit(ir, TGSI_OPCODE_MOV, l, r); 2504 l.index++; 2505 r.index++; 2506 } 2507 } 2508 2509 sig_iter.next(); 2510 } 2511 assert(!sig_iter.has_next()); 2512 2513 /* Process return value. */ 2514 this->result = entry->return_reg; 2515} 2516 2517void 2518glsl_to_tgsi_visitor::visit(ir_texture *ir) 2519{ 2520 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2521 st_dst_reg result_dst, coord_dst; 2522 glsl_to_tgsi_instruction *inst = NULL; 2523 unsigned opcode = TGSI_OPCODE_NOP; 2524 2525 if (ir->coordinate) { 2526 ir->coordinate->accept(this); 2527 2528 /* Put our coords in a temp. We'll need to modify them for shadow, 2529 * projection, or LOD, so the only case we'd use it as is is if 2530 * we're doing plain old texturing. The optimization passes on 2531 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2532 */ 2533 coord = get_temp(glsl_type::vec4_type); 2534 coord_dst = st_dst_reg(coord); 2535 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2536 } 2537 2538 if (ir->projector) { 2539 ir->projector->accept(this); 2540 projector = this->result; 2541 } 2542 2543 /* Storage for our result. Ideally for an assignment we'd be using 2544 * the actual storage for the result here, instead. 2545 */ 2546 result_src = get_temp(glsl_type::vec4_type); 2547 result_dst = st_dst_reg(result_src); 2548 2549 switch (ir->op) { 2550 case ir_tex: 2551 opcode = TGSI_OPCODE_TEX; 2552 break; 2553 case ir_txb: 2554 opcode = TGSI_OPCODE_TXB; 2555 ir->lod_info.bias->accept(this); 2556 lod_info = this->result; 2557 break; 2558 case ir_txl: 2559 opcode = TGSI_OPCODE_TXL; 2560 ir->lod_info.lod->accept(this); 2561 lod_info = this->result; 2562 break; 2563 case ir_txd: 2564 opcode = TGSI_OPCODE_TXD; 2565 ir->lod_info.grad.dPdx->accept(this); 2566 dx = this->result; 2567 ir->lod_info.grad.dPdy->accept(this); 2568 dy = this->result; 2569 break; 2570 case ir_txs: 2571 opcode = TGSI_OPCODE_TXQ; 2572 ir->lod_info.lod->accept(this); 2573 lod_info = this->result; 2574 break; 2575 case ir_txf: 2576 opcode = TGSI_OPCODE_TXF; 2577 ir->lod_info.lod->accept(this); 2578 lod_info = this->result; 2579 if (ir->offset) { 2580 ir->offset->accept(this); 2581 offset = this->result; 2582 } 2583 break; 2584 } 2585 2586 const glsl_type *sampler_type = ir->sampler->type; 2587 2588 if (ir->projector) { 2589 if (opcode == TGSI_OPCODE_TEX) { 2590 /* Slot the projector in as the last component of the coord. */ 2591 coord_dst.writemask = WRITEMASK_W; 2592 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2593 coord_dst.writemask = WRITEMASK_XYZW; 2594 opcode = TGSI_OPCODE_TXP; 2595 } else { 2596 st_src_reg coord_w = coord; 2597 coord_w.swizzle = SWIZZLE_WWWW; 2598 2599 /* For the other TEX opcodes there's no projective version 2600 * since the last slot is taken up by LOD info. Do the 2601 * projective divide now. 2602 */ 2603 coord_dst.writemask = WRITEMASK_W; 2604 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2605 2606 /* In the case where we have to project the coordinates "by hand," 2607 * the shadow comparator value must also be projected. 2608 */ 2609 st_src_reg tmp_src = coord; 2610 if (ir->shadow_comparitor) { 2611 /* Slot the shadow value in as the second to last component of the 2612 * coord. 2613 */ 2614 ir->shadow_comparitor->accept(this); 2615 2616 tmp_src = get_temp(glsl_type::vec4_type); 2617 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2618 2619 /* Projective division not allowed for array samplers. */ 2620 assert(!sampler_type->sampler_array); 2621 2622 tmp_dst.writemask = WRITEMASK_Z; 2623 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2624 2625 tmp_dst.writemask = WRITEMASK_XY; 2626 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2627 } 2628 2629 coord_dst.writemask = WRITEMASK_XYZ; 2630 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2631 2632 coord_dst.writemask = WRITEMASK_XYZW; 2633 coord.swizzle = SWIZZLE_XYZW; 2634 } 2635 } 2636 2637 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2638 * comparator was put in the correct place (and projected) by the code, 2639 * above, that handles by-hand projection. 2640 */ 2641 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2642 /* Slot the shadow value in as the second to last component of the 2643 * coord. 2644 */ 2645 ir->shadow_comparitor->accept(this); 2646 2647 /* XXX This will need to be updated for cubemap array samplers. */ 2648 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2649 sampler_type->sampler_array) { 2650 coord_dst.writemask = WRITEMASK_W; 2651 } else { 2652 coord_dst.writemask = WRITEMASK_Z; 2653 } 2654 2655 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2656 coord_dst.writemask = WRITEMASK_XYZW; 2657 } 2658 2659 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2660 opcode == TGSI_OPCODE_TXF) { 2661 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2662 coord_dst.writemask = WRITEMASK_W; 2663 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2664 coord_dst.writemask = WRITEMASK_XYZW; 2665 } 2666 2667 if (opcode == TGSI_OPCODE_TXD) 2668 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2669 else if (opcode == TGSI_OPCODE_TXQ) 2670 inst = emit(ir, opcode, result_dst, lod_info); 2671 else if (opcode == TGSI_OPCODE_TXF) { 2672 inst = emit(ir, opcode, result_dst, coord); 2673 } else 2674 inst = emit(ir, opcode, result_dst, coord); 2675 2676 if (ir->shadow_comparitor) 2677 inst->tex_shadow = GL_TRUE; 2678 2679 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2680 this->shader_program, 2681 this->prog); 2682 2683 if (ir->offset) { 2684 inst->tex_offset_num_offset = 1; 2685 inst->tex_offsets[0].Index = offset.index; 2686 inst->tex_offsets[0].File = offset.file; 2687 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2688 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2689 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2690 } 2691 2692 switch (sampler_type->sampler_dimensionality) { 2693 case GLSL_SAMPLER_DIM_1D: 2694 inst->tex_target = (sampler_type->sampler_array) 2695 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2696 break; 2697 case GLSL_SAMPLER_DIM_2D: 2698 inst->tex_target = (sampler_type->sampler_array) 2699 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2700 break; 2701 case GLSL_SAMPLER_DIM_3D: 2702 inst->tex_target = TEXTURE_3D_INDEX; 2703 break; 2704 case GLSL_SAMPLER_DIM_CUBE: 2705 inst->tex_target = TEXTURE_CUBE_INDEX; 2706 break; 2707 case GLSL_SAMPLER_DIM_RECT: 2708 inst->tex_target = TEXTURE_RECT_INDEX; 2709 break; 2710 case GLSL_SAMPLER_DIM_BUF: 2711 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2712 break; 2713 case GLSL_SAMPLER_DIM_EXTERNAL: 2714 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2715 break; 2716 default: 2717 assert(!"Should not get here."); 2718 } 2719 2720 this->result = result_src; 2721} 2722 2723void 2724glsl_to_tgsi_visitor::visit(ir_return *ir) 2725{ 2726 if (ir->get_value()) { 2727 st_dst_reg l; 2728 int i; 2729 2730 assert(current_function); 2731 2732 ir->get_value()->accept(this); 2733 st_src_reg r = this->result; 2734 2735 l = st_dst_reg(current_function->return_reg); 2736 2737 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2738 emit(ir, TGSI_OPCODE_MOV, l, r); 2739 l.index++; 2740 r.index++; 2741 } 2742 } 2743 2744 emit(ir, TGSI_OPCODE_RET); 2745} 2746 2747void 2748glsl_to_tgsi_visitor::visit(ir_discard *ir) 2749{ 2750 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2751 2752 if (ir->condition) { 2753 ir->condition->accept(this); 2754 this->result.negate = ~this->result.negate; 2755 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2756 } else { 2757 emit(ir, TGSI_OPCODE_KILP); 2758 } 2759 2760 fp->UsesKill = GL_TRUE; 2761} 2762 2763void 2764glsl_to_tgsi_visitor::visit(ir_if *ir) 2765{ 2766 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2767 glsl_to_tgsi_instruction *prev_inst; 2768 2769 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2770 2771 ir->condition->accept(this); 2772 assert(this->result.file != PROGRAM_UNDEFINED); 2773 2774 if (this->options->EmitCondCodes) { 2775 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2776 2777 /* See if we actually generated any instruction for generating 2778 * the condition. If not, then cook up a move to a temp so we 2779 * have something to set cond_update on. 2780 */ 2781 if (cond_inst == prev_inst) { 2782 st_src_reg temp = get_temp(glsl_type::bool_type); 2783 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2784 } 2785 cond_inst->cond_update = GL_TRUE; 2786 2787 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2788 if_inst->dst.cond_mask = COND_NE; 2789 } else { 2790 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2791 } 2792 2793 this->instructions.push_tail(if_inst); 2794 2795 visit_exec_list(&ir->then_instructions, this); 2796 2797 if (!ir->else_instructions.is_empty()) { 2798 emit(ir->condition, TGSI_OPCODE_ELSE); 2799 visit_exec_list(&ir->else_instructions, this); 2800 } 2801 2802 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2803} 2804 2805glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2806{ 2807 result.file = PROGRAM_UNDEFINED; 2808 next_temp = 1; 2809 next_signature_id = 1; 2810 num_immediates = 0; 2811 current_function = NULL; 2812 num_address_regs = 0; 2813 indirect_addr_temps = false; 2814 indirect_addr_consts = false; 2815 mem_ctx = ralloc_context(NULL); 2816} 2817 2818glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2819{ 2820 ralloc_free(mem_ctx); 2821} 2822 2823extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2824{ 2825 delete v; 2826} 2827 2828 2829/** 2830 * Count resources used by the given gpu program (number of texture 2831 * samplers, etc). 2832 */ 2833static void 2834count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2835{ 2836 v->samplers_used = 0; 2837 2838 foreach_iter(exec_list_iterator, iter, v->instructions) { 2839 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2840 2841 if (is_tex_instruction(inst->op)) { 2842 v->samplers_used |= 1 << inst->sampler; 2843 2844 prog->SamplerTargets[inst->sampler] = 2845 (gl_texture_index)inst->tex_target; 2846 if (inst->tex_shadow) { 2847 prog->ShadowSamplers |= 1 << inst->sampler; 2848 } 2849 } 2850 } 2851 2852 prog->SamplersUsed = v->samplers_used; 2853 _mesa_update_shader_textures_used(prog); 2854} 2855 2856static void 2857set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2858 struct gl_shader_program *shader_program, 2859 const char *name, const glsl_type *type, 2860 ir_constant *val) 2861{ 2862 if (type->is_record()) { 2863 ir_constant *field_constant; 2864 2865 field_constant = (ir_constant *)val->components.get_head(); 2866 2867 for (unsigned int i = 0; i < type->length; i++) { 2868 const glsl_type *field_type = type->fields.structure[i].type; 2869 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2870 type->fields.structure[i].name); 2871 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2872 field_type, field_constant); 2873 field_constant = (ir_constant *)field_constant->next; 2874 } 2875 return; 2876 } 2877 2878 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2879 2880 if (loc == -1) { 2881 fail_link(shader_program, 2882 "Couldn't find uniform for initializer %s\n", name); 2883 return; 2884 } 2885 2886 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2887 ir_constant *element; 2888 const glsl_type *element_type; 2889 if (type->is_array()) { 2890 element = val->array_elements[i]; 2891 element_type = type->fields.array; 2892 } else { 2893 element = val; 2894 element_type = type; 2895 } 2896 2897 void *values; 2898 2899 if (element_type->base_type == GLSL_TYPE_BOOL) { 2900 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2901 for (unsigned int j = 0; j < element_type->components(); j++) { 2902 conv[j] = element->value.b[j]; 2903 } 2904 values = (void *)conv; 2905 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2906 element_type->vector_elements, 2907 1); 2908 } else { 2909 values = &element->value; 2910 } 2911 2912 if (element_type->is_matrix()) { 2913 _mesa_uniform_matrix(ctx, shader_program, 2914 element_type->matrix_columns, 2915 element_type->vector_elements, 2916 loc, 1, GL_FALSE, (GLfloat *)values); 2917 } else { 2918 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2919 values, element_type->gl_type); 2920 } 2921 2922 loc++; 2923 } 2924} 2925 2926/* 2927 * Scan/rewrite program to remove reads of custom (output) registers. 2928 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2929 * (for vertex shaders). 2930 * In GLSL shaders, varying vars can be read and written. 2931 * On some hardware, trying to read an output register causes trouble. 2932 * So, rewrite the program to use a temporary register in this case. 2933 * 2934 * Based on _mesa_remove_output_reads from programopt.c. 2935 */ 2936void 2937glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2938{ 2939 GLuint i; 2940 GLint outputMap[VERT_RESULT_MAX]; 2941 GLint outputTypes[VERT_RESULT_MAX]; 2942 GLuint numVaryingReads = 0; 2943 GLboolean *usedTemps; 2944 GLuint firstTemp = 0; 2945 2946 usedTemps = new GLboolean[MAX_TEMPS]; 2947 if (!usedTemps) { 2948 return; 2949 } 2950 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2951 usedTemps, MAX_TEMPS); 2952 2953 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2954 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2955 2956 for (i = 0; i < VERT_RESULT_MAX; i++) 2957 outputMap[i] = -1; 2958 2959 /* look for instructions which read from varying vars */ 2960 foreach_iter(exec_list_iterator, iter, this->instructions) { 2961 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2962 const GLuint numSrc = num_inst_src_regs(inst->op); 2963 GLuint j; 2964 for (j = 0; j < numSrc; j++) { 2965 if (inst->src[j].file == type) { 2966 /* replace the read with a temp reg */ 2967 const GLuint var = inst->src[j].index; 2968 if (outputMap[var] == -1) { 2969 numVaryingReads++; 2970 outputMap[var] = _mesa_find_free_register(usedTemps, 2971 MAX_TEMPS, 2972 firstTemp); 2973 outputTypes[var] = inst->src[j].type; 2974 firstTemp = outputMap[var] + 1; 2975 } 2976 inst->src[j].file = PROGRAM_TEMPORARY; 2977 inst->src[j].index = outputMap[var]; 2978 } 2979 } 2980 } 2981 2982 delete [] usedTemps; 2983 2984 if (numVaryingReads == 0) 2985 return; /* nothing to be done */ 2986 2987 /* look for instructions which write to the varying vars identified above */ 2988 foreach_iter(exec_list_iterator, iter, this->instructions) { 2989 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2990 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 2991 /* change inst to write to the temp reg, instead of the varying */ 2992 inst->dst.file = PROGRAM_TEMPORARY; 2993 inst->dst.index = outputMap[inst->dst.index]; 2994 } 2995 } 2996 2997 /* insert new MOV instructions at the end */ 2998 for (i = 0; i < VERT_RESULT_MAX; i++) { 2999 if (outputMap[i] >= 0) { 3000 /* MOV VAR[i], TEMP[tmp]; */ 3001 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 3002 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 3003 dst.index = i; 3004 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 3005 } 3006 } 3007} 3008 3009/** 3010 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 3011 * are read from the given src in this instruction 3012 */ 3013static int 3014get_src_arg_mask(st_dst_reg dst, st_src_reg src) 3015{ 3016 int read_mask = 0, comp; 3017 3018 /* Now, given the src swizzle and the written channels, find which 3019 * components are actually read 3020 */ 3021 for (comp = 0; comp < 4; ++comp) { 3022 const unsigned coord = GET_SWZ(src.swizzle, comp); 3023 ASSERT(coord < 4); 3024 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 3025 read_mask |= 1 << coord; 3026 } 3027 3028 return read_mask; 3029} 3030 3031/** 3032 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 3033 * instruction is the first instruction to write to register T0. There are 3034 * several lowering passes done in GLSL IR (e.g. branches and 3035 * relative addressing) that create a large number of conditional assignments 3036 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 3037 * 3038 * Here is why this conversion is safe: 3039 * CMP T0, T1 T2 T0 can be expanded to: 3040 * if (T1 < 0.0) 3041 * MOV T0, T2; 3042 * else 3043 * MOV T0, T0; 3044 * 3045 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3046 * as the original program. If (T1 < 0.0) evaluates to false, executing 3047 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3048 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3049 * because any instruction that was going to read from T0 after this was going 3050 * to read a garbage value anyway. 3051 */ 3052void 3053glsl_to_tgsi_visitor::simplify_cmp(void) 3054{ 3055 unsigned *tempWrites; 3056 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3057 3058 tempWrites = new unsigned[MAX_TEMPS]; 3059 if (!tempWrites) { 3060 return; 3061 } 3062 memset(tempWrites, 0, sizeof(tempWrites)); 3063 memset(outputWrites, 0, sizeof(outputWrites)); 3064 3065 foreach_iter(exec_list_iterator, iter, this->instructions) { 3066 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3067 unsigned prevWriteMask = 0; 3068 3069 /* Give up if we encounter relative addressing or flow control. */ 3070 if (inst->dst.reladdr || 3071 tgsi_get_opcode_info(inst->op)->is_branch || 3072 inst->op == TGSI_OPCODE_BGNSUB || 3073 inst->op == TGSI_OPCODE_CONT || 3074 inst->op == TGSI_OPCODE_END || 3075 inst->op == TGSI_OPCODE_ENDSUB || 3076 inst->op == TGSI_OPCODE_RET) { 3077 break; 3078 } 3079 3080 if (inst->dst.file == PROGRAM_OUTPUT) { 3081 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3082 prevWriteMask = outputWrites[inst->dst.index]; 3083 outputWrites[inst->dst.index] |= inst->dst.writemask; 3084 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3085 assert(inst->dst.index < MAX_TEMPS); 3086 prevWriteMask = tempWrites[inst->dst.index]; 3087 tempWrites[inst->dst.index] |= inst->dst.writemask; 3088 } 3089 3090 /* For a CMP to be considered a conditional write, the destination 3091 * register and source register two must be the same. */ 3092 if (inst->op == TGSI_OPCODE_CMP 3093 && !(inst->dst.writemask & prevWriteMask) 3094 && inst->src[2].file == inst->dst.file 3095 && inst->src[2].index == inst->dst.index 3096 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3097 3098 inst->op = TGSI_OPCODE_MOV; 3099 inst->src[0] = inst->src[1]; 3100 } 3101 } 3102 3103 delete [] tempWrites; 3104} 3105 3106/* Replaces all references to a temporary register index with another index. */ 3107void 3108glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3109{ 3110 foreach_iter(exec_list_iterator, iter, this->instructions) { 3111 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3112 unsigned j; 3113 3114 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3115 if (inst->src[j].file == PROGRAM_TEMPORARY && 3116 inst->src[j].index == index) { 3117 inst->src[j].index = new_index; 3118 } 3119 } 3120 3121 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3122 inst->dst.index = new_index; 3123 } 3124 } 3125} 3126 3127int 3128glsl_to_tgsi_visitor::get_first_temp_read(int index) 3129{ 3130 int depth = 0; /* loop depth */ 3131 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3132 unsigned i = 0, j; 3133 3134 foreach_iter(exec_list_iterator, iter, this->instructions) { 3135 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3136 3137 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3138 if (inst->src[j].file == PROGRAM_TEMPORARY && 3139 inst->src[j].index == index) { 3140 return (depth == 0) ? i : loop_start; 3141 } 3142 } 3143 3144 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3145 if(depth++ == 0) 3146 loop_start = i; 3147 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3148 if (--depth == 0) 3149 loop_start = -1; 3150 } 3151 assert(depth >= 0); 3152 3153 i++; 3154 } 3155 3156 return -1; 3157} 3158 3159int 3160glsl_to_tgsi_visitor::get_first_temp_write(int index) 3161{ 3162 int depth = 0; /* loop depth */ 3163 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3164 int i = 0; 3165 3166 foreach_iter(exec_list_iterator, iter, this->instructions) { 3167 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3168 3169 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3170 return (depth == 0) ? i : loop_start; 3171 } 3172 3173 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3174 if(depth++ == 0) 3175 loop_start = i; 3176 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3177 if (--depth == 0) 3178 loop_start = -1; 3179 } 3180 assert(depth >= 0); 3181 3182 i++; 3183 } 3184 3185 return -1; 3186} 3187 3188int 3189glsl_to_tgsi_visitor::get_last_temp_read(int index) 3190{ 3191 int depth = 0; /* loop depth */ 3192 int last = -1; /* index of last instruction that reads the temporary */ 3193 unsigned i = 0, j; 3194 3195 foreach_iter(exec_list_iterator, iter, this->instructions) { 3196 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3197 3198 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3199 if (inst->src[j].file == PROGRAM_TEMPORARY && 3200 inst->src[j].index == index) { 3201 last = (depth == 0) ? i : -2; 3202 } 3203 } 3204 3205 if (inst->op == TGSI_OPCODE_BGNLOOP) 3206 depth++; 3207 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3208 if (--depth == 0 && last == -2) 3209 last = i; 3210 assert(depth >= 0); 3211 3212 i++; 3213 } 3214 3215 assert(last >= -1); 3216 return last; 3217} 3218 3219int 3220glsl_to_tgsi_visitor::get_last_temp_write(int index) 3221{ 3222 int depth = 0; /* loop depth */ 3223 int last = -1; /* index of last instruction that writes to the temporary */ 3224 int i = 0; 3225 3226 foreach_iter(exec_list_iterator, iter, this->instructions) { 3227 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3228 3229 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3230 last = (depth == 0) ? i : -2; 3231 3232 if (inst->op == TGSI_OPCODE_BGNLOOP) 3233 depth++; 3234 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3235 if (--depth == 0 && last == -2) 3236 last = i; 3237 assert(depth >= 0); 3238 3239 i++; 3240 } 3241 3242 assert(last >= -1); 3243 return last; 3244} 3245 3246/* 3247 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3248 * channels for copy propagation and updates following instructions to 3249 * use the original versions. 3250 * 3251 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3252 * will occur. As an example, a TXP production before this pass: 3253 * 3254 * 0: MOV TEMP[1], INPUT[4].xyyy; 3255 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3256 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3257 * 3258 * and after: 3259 * 3260 * 0: MOV TEMP[1], INPUT[4].xyyy; 3261 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3262 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3263 * 3264 * which allows for dead code elimination on TEMP[1]'s writes. 3265 */ 3266void 3267glsl_to_tgsi_visitor::copy_propagate(void) 3268{ 3269 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3270 glsl_to_tgsi_instruction *, 3271 this->next_temp * 4); 3272 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3273 int level = 0; 3274 3275 foreach_iter(exec_list_iterator, iter, this->instructions) { 3276 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3277 3278 assert(inst->dst.file != PROGRAM_TEMPORARY 3279 || inst->dst.index < this->next_temp); 3280 3281 /* First, do any copy propagation possible into the src regs. */ 3282 for (int r = 0; r < 3; r++) { 3283 glsl_to_tgsi_instruction *first = NULL; 3284 bool good = true; 3285 int acp_base = inst->src[r].index * 4; 3286 3287 if (inst->src[r].file != PROGRAM_TEMPORARY || 3288 inst->src[r].reladdr) 3289 continue; 3290 3291 /* See if we can find entries in the ACP consisting of MOVs 3292 * from the same src register for all the swizzled channels 3293 * of this src register reference. 3294 */ 3295 for (int i = 0; i < 4; i++) { 3296 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3297 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3298 3299 if (!copy_chan) { 3300 good = false; 3301 break; 3302 } 3303 3304 assert(acp_level[acp_base + src_chan] <= level); 3305 3306 if (!first) { 3307 first = copy_chan; 3308 } else { 3309 if (first->src[0].file != copy_chan->src[0].file || 3310 first->src[0].index != copy_chan->src[0].index) { 3311 good = false; 3312 break; 3313 } 3314 } 3315 } 3316 3317 if (good) { 3318 /* We've now validated that we can copy-propagate to 3319 * replace this src register reference. Do it. 3320 */ 3321 inst->src[r].file = first->src[0].file; 3322 inst->src[r].index = first->src[0].index; 3323 3324 int swizzle = 0; 3325 for (int i = 0; i < 4; i++) { 3326 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3327 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3328 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3329 (3 * i)); 3330 } 3331 inst->src[r].swizzle = swizzle; 3332 } 3333 } 3334 3335 switch (inst->op) { 3336 case TGSI_OPCODE_BGNLOOP: 3337 case TGSI_OPCODE_ENDLOOP: 3338 /* End of a basic block, clear the ACP entirely. */ 3339 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3340 break; 3341 3342 case TGSI_OPCODE_IF: 3343 ++level; 3344 break; 3345 3346 case TGSI_OPCODE_ENDIF: 3347 case TGSI_OPCODE_ELSE: 3348 /* Clear all channels written inside the block from the ACP, but 3349 * leaving those that were not touched. 3350 */ 3351 for (int r = 0; r < this->next_temp; r++) { 3352 for (int c = 0; c < 4; c++) { 3353 if (!acp[4 * r + c]) 3354 continue; 3355 3356 if (acp_level[4 * r + c] >= level) 3357 acp[4 * r + c] = NULL; 3358 } 3359 } 3360 if (inst->op == TGSI_OPCODE_ENDIF) 3361 --level; 3362 break; 3363 3364 default: 3365 /* Continuing the block, clear any written channels from 3366 * the ACP. 3367 */ 3368 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3369 /* Any temporary might be written, so no copy propagation 3370 * across this instruction. 3371 */ 3372 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3373 } else if (inst->dst.file == PROGRAM_OUTPUT && 3374 inst->dst.reladdr) { 3375 /* Any output might be written, so no copy propagation 3376 * from outputs across this instruction. 3377 */ 3378 for (int r = 0; r < this->next_temp; r++) { 3379 for (int c = 0; c < 4; c++) { 3380 if (!acp[4 * r + c]) 3381 continue; 3382 3383 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3384 acp[4 * r + c] = NULL; 3385 } 3386 } 3387 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3388 inst->dst.file == PROGRAM_OUTPUT) { 3389 /* Clear where it's used as dst. */ 3390 if (inst->dst.file == PROGRAM_TEMPORARY) { 3391 for (int c = 0; c < 4; c++) { 3392 if (inst->dst.writemask & (1 << c)) { 3393 acp[4 * inst->dst.index + c] = NULL; 3394 } 3395 } 3396 } 3397 3398 /* Clear where it's used as src. */ 3399 for (int r = 0; r < this->next_temp; r++) { 3400 for (int c = 0; c < 4; c++) { 3401 if (!acp[4 * r + c]) 3402 continue; 3403 3404 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3405 3406 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3407 acp[4 * r + c]->src[0].index == inst->dst.index && 3408 inst->dst.writemask & (1 << src_chan)) 3409 { 3410 acp[4 * r + c] = NULL; 3411 } 3412 } 3413 } 3414 } 3415 break; 3416 } 3417 3418 /* If this is a copy, add it to the ACP. */ 3419 if (inst->op == TGSI_OPCODE_MOV && 3420 inst->dst.file == PROGRAM_TEMPORARY && 3421 !inst->dst.reladdr && 3422 !inst->saturate && 3423 !inst->src[0].reladdr && 3424 !inst->src[0].negate) { 3425 for (int i = 0; i < 4; i++) { 3426 if (inst->dst.writemask & (1 << i)) { 3427 acp[4 * inst->dst.index + i] = inst; 3428 acp_level[4 * inst->dst.index + i] = level; 3429 } 3430 } 3431 } 3432 } 3433 3434 ralloc_free(acp_level); 3435 ralloc_free(acp); 3436} 3437 3438/* 3439 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3440 * 3441 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3442 * will occur. As an example, a TXP production after copy propagation but 3443 * before this pass: 3444 * 3445 * 0: MOV TEMP[1], INPUT[4].xyyy; 3446 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3447 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3448 * 3449 * and after this pass: 3450 * 3451 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3452 * 3453 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3454 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3455 */ 3456void 3457glsl_to_tgsi_visitor::eliminate_dead_code(void) 3458{ 3459 int i; 3460 3461 for (i=0; i < this->next_temp; i++) { 3462 int last_read = get_last_temp_read(i); 3463 int j = 0; 3464 3465 foreach_iter(exec_list_iterator, iter, this->instructions) { 3466 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3467 3468 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3469 j > last_read) 3470 { 3471 iter.remove(); 3472 delete inst; 3473 } 3474 3475 j++; 3476 } 3477 } 3478} 3479 3480/* 3481 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3482 * code elimination. This is less primitive than eliminate_dead_code(), as it 3483 * is per-channel and can detect consecutive writes without a read between them 3484 * as dead code. However, there is some dead code that can be eliminated by 3485 * eliminate_dead_code() but not this function - for example, this function 3486 * cannot eliminate an instruction writing to a register that is never read and 3487 * is the only instruction writing to that register. 3488 * 3489 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3490 * will occur. 3491 */ 3492int 3493glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3494{ 3495 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3496 glsl_to_tgsi_instruction *, 3497 this->next_temp * 4); 3498 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3499 int level = 0; 3500 int removed = 0; 3501 3502 foreach_iter(exec_list_iterator, iter, this->instructions) { 3503 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3504 3505 assert(inst->dst.file != PROGRAM_TEMPORARY 3506 || inst->dst.index < this->next_temp); 3507 3508 switch (inst->op) { 3509 case TGSI_OPCODE_BGNLOOP: 3510 case TGSI_OPCODE_ENDLOOP: 3511 case TGSI_OPCODE_CONT: 3512 case TGSI_OPCODE_BRK: 3513 /* End of a basic block, clear the write array entirely. 3514 * 3515 * This keeps us from killing dead code when the writes are 3516 * on either side of a loop, even when the register isn't touched 3517 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3518 * dead code of this type, so it shouldn't make a difference as long as 3519 * the dead code elimination pass in the GLSL compiler does its job. 3520 */ 3521 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3522 break; 3523 3524 case TGSI_OPCODE_ENDIF: 3525 case TGSI_OPCODE_ELSE: 3526 /* Promote the recorded level of all channels written inside the 3527 * preceding if or else block to the level above the if/else block. 3528 */ 3529 for (int r = 0; r < this->next_temp; r++) { 3530 for (int c = 0; c < 4; c++) { 3531 if (!writes[4 * r + c]) 3532 continue; 3533 3534 if (write_level[4 * r + c] == level) 3535 write_level[4 * r + c] = level-1; 3536 } 3537 } 3538 3539 if(inst->op == TGSI_OPCODE_ENDIF) 3540 --level; 3541 3542 break; 3543 3544 case TGSI_OPCODE_IF: 3545 ++level; 3546 /* fallthrough to default case to mark the condition as read */ 3547 3548 default: 3549 /* Continuing the block, clear any channels from the write array that 3550 * are read by this instruction. 3551 */ 3552 for (unsigned i = 0; i < Elements(inst->src); i++) { 3553 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3554 /* Any temporary might be read, so no dead code elimination 3555 * across this instruction. 3556 */ 3557 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3558 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3559 /* Clear where it's used as src. */ 3560 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3561 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3562 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3563 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3564 3565 for (int c = 0; c < 4; c++) { 3566 if (src_chans & (1 << c)) { 3567 writes[4 * inst->src[i].index + c] = NULL; 3568 } 3569 } 3570 } 3571 } 3572 break; 3573 } 3574 3575 /* If this instruction writes to a temporary, add it to the write array. 3576 * If there is already an instruction in the write array for one or more 3577 * of the channels, flag that channel write as dead. 3578 */ 3579 if (inst->dst.file == PROGRAM_TEMPORARY && 3580 !inst->dst.reladdr && 3581 !inst->saturate) { 3582 for (int c = 0; c < 4; c++) { 3583 if (inst->dst.writemask & (1 << c)) { 3584 if (writes[4 * inst->dst.index + c]) { 3585 if (write_level[4 * inst->dst.index + c] < level) 3586 continue; 3587 else 3588 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3589 } 3590 writes[4 * inst->dst.index + c] = inst; 3591 write_level[4 * inst->dst.index + c] = level; 3592 } 3593 } 3594 } 3595 } 3596 3597 /* Anything still in the write array at this point is dead code. */ 3598 for (int r = 0; r < this->next_temp; r++) { 3599 for (int c = 0; c < 4; c++) { 3600 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3601 if (inst) 3602 inst->dead_mask |= (1 << c); 3603 } 3604 } 3605 3606 /* Now actually remove the instructions that are completely dead and update 3607 * the writemask of other instructions with dead channels. 3608 */ 3609 foreach_iter(exec_list_iterator, iter, this->instructions) { 3610 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3611 3612 if (!inst->dead_mask || !inst->dst.writemask) 3613 continue; 3614 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3615 iter.remove(); 3616 delete inst; 3617 removed++; 3618 } else 3619 inst->dst.writemask &= ~(inst->dead_mask); 3620 } 3621 3622 ralloc_free(write_level); 3623 ralloc_free(writes); 3624 3625 return removed; 3626} 3627 3628/* Merges temporary registers together where possible to reduce the number of 3629 * registers needed to run a program. 3630 * 3631 * Produces optimal code only after copy propagation and dead code elimination 3632 * have been run. */ 3633void 3634glsl_to_tgsi_visitor::merge_registers(void) 3635{ 3636 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3637 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3638 int i, j; 3639 3640 /* Read the indices of the last read and first write to each temp register 3641 * into an array so that we don't have to traverse the instruction list as 3642 * much. */ 3643 for (i=0; i < this->next_temp; i++) { 3644 last_reads[i] = get_last_temp_read(i); 3645 first_writes[i] = get_first_temp_write(i); 3646 } 3647 3648 /* Start looking for registers with non-overlapping usages that can be 3649 * merged together. */ 3650 for (i=0; i < this->next_temp; i++) { 3651 /* Don't touch unused registers. */ 3652 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3653 3654 for (j=0; j < this->next_temp; j++) { 3655 /* Don't touch unused registers. */ 3656 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3657 3658 /* We can merge the two registers if the first write to j is after or 3659 * in the same instruction as the last read from i. Note that the 3660 * register at index i will always be used earlier or at the same time 3661 * as the register at index j. */ 3662 if (first_writes[i] <= first_writes[j] && 3663 last_reads[i] <= first_writes[j]) 3664 { 3665 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3666 3667 /* Update the first_writes and last_reads arrays with the new 3668 * values for the merged register index, and mark the newly unused 3669 * register index as such. */ 3670 last_reads[i] = last_reads[j]; 3671 first_writes[j] = -1; 3672 last_reads[j] = -1; 3673 } 3674 } 3675 } 3676 3677 ralloc_free(last_reads); 3678 ralloc_free(first_writes); 3679} 3680 3681/* Reassign indices to temporary registers by reusing unused indices created 3682 * by optimization passes. */ 3683void 3684glsl_to_tgsi_visitor::renumber_registers(void) 3685{ 3686 int i = 0; 3687 int new_index = 0; 3688 3689 for (i=0; i < this->next_temp; i++) { 3690 if (get_first_temp_read(i) < 0) continue; 3691 if (i != new_index) 3692 rename_temp_register(i, new_index); 3693 new_index++; 3694 } 3695 3696 this->next_temp = new_index; 3697} 3698 3699/** 3700 * Returns a fragment program which implements the current pixel transfer ops. 3701 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3702 */ 3703extern "C" void 3704get_pixel_transfer_visitor(struct st_fragment_program *fp, 3705 glsl_to_tgsi_visitor *original, 3706 int scale_and_bias, int pixel_maps) 3707{ 3708 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3709 struct st_context *st = st_context(original->ctx); 3710 struct gl_program *prog = &fp->Base.Base; 3711 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3712 st_src_reg coord, src0; 3713 st_dst_reg dst0; 3714 glsl_to_tgsi_instruction *inst; 3715 3716 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3717 v->ctx = original->ctx; 3718 v->prog = prog; 3719 v->glsl_version = original->glsl_version; 3720 v->native_integers = original->native_integers; 3721 v->options = original->options; 3722 v->next_temp = original->next_temp; 3723 v->num_address_regs = original->num_address_regs; 3724 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3725 v->indirect_addr_temps = original->indirect_addr_temps; 3726 v->indirect_addr_consts = original->indirect_addr_consts; 3727 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3728 3729 /* 3730 * Get initial pixel color from the texture. 3731 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3732 */ 3733 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3734 src0 = v->get_temp(glsl_type::vec4_type); 3735 dst0 = st_dst_reg(src0); 3736 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3737 inst->sampler = 0; 3738 inst->tex_target = TEXTURE_2D_INDEX; 3739 3740 prog->InputsRead |= FRAG_BIT_TEX0; 3741 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3742 v->samplers_used |= (1 << 0); 3743 3744 if (scale_and_bias) { 3745 static const gl_state_index scale_state[STATE_LENGTH] = 3746 { STATE_INTERNAL, STATE_PT_SCALE, 3747 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3748 static const gl_state_index bias_state[STATE_LENGTH] = 3749 { STATE_INTERNAL, STATE_PT_BIAS, 3750 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3751 GLint scale_p, bias_p; 3752 st_src_reg scale, bias; 3753 3754 scale_p = _mesa_add_state_reference(params, scale_state); 3755 bias_p = _mesa_add_state_reference(params, bias_state); 3756 3757 /* MAD colorTemp, colorTemp, scale, bias; */ 3758 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3759 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3760 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3761 } 3762 3763 if (pixel_maps) { 3764 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3765 st_dst_reg temp_dst = st_dst_reg(temp); 3766 3767 assert(st->pixel_xfer.pixelmap_texture); 3768 3769 /* With a little effort, we can do four pixel map look-ups with 3770 * two TEX instructions: 3771 */ 3772 3773 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3774 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3775 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3776 inst->sampler = 1; 3777 inst->tex_target = TEXTURE_2D_INDEX; 3778 3779 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3780 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3781 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3782 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3783 inst->sampler = 1; 3784 inst->tex_target = TEXTURE_2D_INDEX; 3785 3786 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3787 v->samplers_used |= (1 << 1); 3788 3789 /* MOV colorTemp, temp; */ 3790 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3791 } 3792 3793 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3794 * new visitor. */ 3795 foreach_iter(exec_list_iterator, iter, original->instructions) { 3796 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3797 glsl_to_tgsi_instruction *newinst; 3798 st_src_reg src_regs[3]; 3799 3800 if (inst->dst.file == PROGRAM_OUTPUT) 3801 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3802 3803 for (int i=0; i<3; i++) { 3804 src_regs[i] = inst->src[i]; 3805 if (src_regs[i].file == PROGRAM_INPUT && 3806 src_regs[i].index == FRAG_ATTRIB_COL0) 3807 { 3808 src_regs[i].file = PROGRAM_TEMPORARY; 3809 src_regs[i].index = src0.index; 3810 } 3811 else if (src_regs[i].file == PROGRAM_INPUT) 3812 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3813 } 3814 3815 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3816 newinst->tex_target = inst->tex_target; 3817 } 3818 3819 /* Make modifications to fragment program info. */ 3820 prog->Parameters = _mesa_combine_parameter_lists(params, 3821 original->prog->Parameters); 3822 _mesa_free_parameter_list(params); 3823 count_resources(v, prog); 3824 fp->glsl_to_tgsi = v; 3825} 3826 3827/** 3828 * Make fragment program for glBitmap: 3829 * Sample the texture and kill the fragment if the bit is 0. 3830 * This program will be combined with the user's fragment program. 3831 * 3832 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3833 */ 3834extern "C" void 3835get_bitmap_visitor(struct st_fragment_program *fp, 3836 glsl_to_tgsi_visitor *original, int samplerIndex) 3837{ 3838 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3839 struct st_context *st = st_context(original->ctx); 3840 struct gl_program *prog = &fp->Base.Base; 3841 st_src_reg coord, src0; 3842 st_dst_reg dst0; 3843 glsl_to_tgsi_instruction *inst; 3844 3845 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3846 v->ctx = original->ctx; 3847 v->prog = prog; 3848 v->glsl_version = original->glsl_version; 3849 v->native_integers = original->native_integers; 3850 v->options = original->options; 3851 v->next_temp = original->next_temp; 3852 v->num_address_regs = original->num_address_regs; 3853 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3854 v->indirect_addr_temps = original->indirect_addr_temps; 3855 v->indirect_addr_consts = original->indirect_addr_consts; 3856 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3857 3858 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3859 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3860 src0 = v->get_temp(glsl_type::vec4_type); 3861 dst0 = st_dst_reg(src0); 3862 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3863 inst->sampler = samplerIndex; 3864 inst->tex_target = TEXTURE_2D_INDEX; 3865 3866 prog->InputsRead |= FRAG_BIT_TEX0; 3867 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3868 v->samplers_used |= (1 << samplerIndex); 3869 3870 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3871 src0.negate = NEGATE_XYZW; 3872 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3873 src0.swizzle = SWIZZLE_XXXX; 3874 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3875 3876 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3877 * new visitor. */ 3878 foreach_iter(exec_list_iterator, iter, original->instructions) { 3879 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3880 glsl_to_tgsi_instruction *newinst; 3881 st_src_reg src_regs[3]; 3882 3883 if (inst->dst.file == PROGRAM_OUTPUT) 3884 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3885 3886 for (int i=0; i<3; i++) { 3887 src_regs[i] = inst->src[i]; 3888 if (src_regs[i].file == PROGRAM_INPUT) 3889 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3890 } 3891 3892 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3893 newinst->tex_target = inst->tex_target; 3894 } 3895 3896 /* Make modifications to fragment program info. */ 3897 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3898 count_resources(v, prog); 3899 fp->glsl_to_tgsi = v; 3900} 3901 3902/* ------------------------- TGSI conversion stuff -------------------------- */ 3903struct label { 3904 unsigned branch_target; 3905 unsigned token; 3906}; 3907 3908/** 3909 * Intermediate state used during shader translation. 3910 */ 3911struct st_translate { 3912 struct ureg_program *ureg; 3913 3914 struct ureg_dst temps[MAX_TEMPS]; 3915 struct ureg_src *constants; 3916 struct ureg_src *immediates; 3917 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3918 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3919 struct ureg_dst address[1]; 3920 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3921 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3922 3923 /* Extra info for handling point size clamping in vertex shader */ 3924 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3925 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3926 GLint pointSizeOutIndex; /**< Temp point size output register */ 3927 GLboolean prevInstWrotePointSize; 3928 3929 const GLuint *inputMapping; 3930 const GLuint *outputMapping; 3931 3932 /* For every instruction that contains a label (eg CALL), keep 3933 * details so that we can go back afterwards and emit the correct 3934 * tgsi instruction number for each label. 3935 */ 3936 struct label *labels; 3937 unsigned labels_size; 3938 unsigned labels_count; 3939 3940 /* Keep a record of the tgsi instruction number that each mesa 3941 * instruction starts at, will be used to fix up labels after 3942 * translation. 3943 */ 3944 unsigned *insn; 3945 unsigned insn_size; 3946 unsigned insn_count; 3947 3948 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3949 3950 boolean error; 3951}; 3952 3953/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3954static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3955 TGSI_SEMANTIC_FACE, 3956 TGSI_SEMANTIC_VERTEXID, 3957 TGSI_SEMANTIC_INSTANCEID 3958}; 3959 3960/** 3961 * Make note of a branch to a label in the TGSI code. 3962 * After we've emitted all instructions, we'll go over the list 3963 * of labels built here and patch the TGSI code with the actual 3964 * location of each label. 3965 */ 3966static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3967{ 3968 unsigned i; 3969 3970 if (t->labels_count + 1 >= t->labels_size) { 3971 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3972 t->labels = (struct label *)realloc(t->labels, 3973 t->labels_size * sizeof(struct label)); 3974 if (t->labels == NULL) { 3975 static unsigned dummy; 3976 t->error = TRUE; 3977 return &dummy; 3978 } 3979 } 3980 3981 i = t->labels_count++; 3982 t->labels[i].branch_target = branch_target; 3983 return &t->labels[i].token; 3984} 3985 3986/** 3987 * Called prior to emitting the TGSI code for each instruction. 3988 * Allocate additional space for instructions if needed. 3989 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3990 * the next TGSI instruction. 3991 */ 3992static void set_insn_start(struct st_translate *t, unsigned start) 3993{ 3994 if (t->insn_count + 1 >= t->insn_size) { 3995 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3996 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3997 if (t->insn == NULL) { 3998 t->error = TRUE; 3999 return; 4000 } 4001 } 4002 4003 t->insn[t->insn_count++] = start; 4004} 4005 4006/** 4007 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 4008 */ 4009static struct ureg_src 4010emit_immediate(struct st_translate *t, 4011 gl_constant_value values[4], 4012 int type, int size) 4013{ 4014 struct ureg_program *ureg = t->ureg; 4015 4016 switch(type) 4017 { 4018 case GL_FLOAT: 4019 return ureg_DECL_immediate(ureg, &values[0].f, size); 4020 case GL_INT: 4021 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 4022 case GL_UNSIGNED_INT: 4023 case GL_BOOL: 4024 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 4025 default: 4026 assert(!"should not get here - type must be float, int, uint, or bool"); 4027 return ureg_src_undef(); 4028 } 4029} 4030 4031/** 4032 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 4033 */ 4034static struct ureg_dst 4035dst_register(struct st_translate *t, 4036 gl_register_file file, 4037 GLuint index) 4038{ 4039 switch(file) { 4040 case PROGRAM_UNDEFINED: 4041 return ureg_dst_undef(); 4042 4043 case PROGRAM_TEMPORARY: 4044 if (ureg_dst_is_undef(t->temps[index])) 4045 t->temps[index] = ureg_DECL_temporary(t->ureg); 4046 4047 return t->temps[index]; 4048 4049 case PROGRAM_OUTPUT: 4050 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 4051 t->prevInstWrotePointSize = GL_TRUE; 4052 4053 if (t->procType == TGSI_PROCESSOR_VERTEX) 4054 assert(index < VERT_RESULT_MAX); 4055 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4056 assert(index < FRAG_RESULT_MAX); 4057 else 4058 assert(index < GEOM_RESULT_MAX); 4059 4060 assert(t->outputMapping[index] < Elements(t->outputs)); 4061 4062 return t->outputs[t->outputMapping[index]]; 4063 4064 case PROGRAM_ADDRESS: 4065 return t->address[index]; 4066 4067 default: 4068 assert(!"unknown dst register file"); 4069 return ureg_dst_undef(); 4070 } 4071} 4072 4073/** 4074 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4075 */ 4076static struct ureg_src 4077src_register(struct st_translate *t, 4078 gl_register_file file, 4079 GLuint index) 4080{ 4081 switch(file) { 4082 case PROGRAM_UNDEFINED: 4083 return ureg_src_undef(); 4084 4085 case PROGRAM_TEMPORARY: 4086 assert(index >= 0); 4087 assert(index < Elements(t->temps)); 4088 if (ureg_dst_is_undef(t->temps[index])) 4089 t->temps[index] = ureg_DECL_temporary(t->ureg); 4090 return ureg_src(t->temps[index]); 4091 4092 case PROGRAM_NAMED_PARAM: 4093 case PROGRAM_ENV_PARAM: 4094 case PROGRAM_LOCAL_PARAM: 4095 case PROGRAM_UNIFORM: 4096 assert(index >= 0); 4097 return t->constants[index]; 4098 case PROGRAM_STATE_VAR: 4099 case PROGRAM_CONSTANT: /* ie, immediate */ 4100 if (index < 0) 4101 return ureg_DECL_constant(t->ureg, 0); 4102 else 4103 return t->constants[index]; 4104 4105 case PROGRAM_IMMEDIATE: 4106 return t->immediates[index]; 4107 4108 case PROGRAM_INPUT: 4109 assert(t->inputMapping[index] < Elements(t->inputs)); 4110 return t->inputs[t->inputMapping[index]]; 4111 4112 case PROGRAM_OUTPUT: 4113 assert(t->outputMapping[index] < Elements(t->outputs)); 4114 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4115 4116 case PROGRAM_ADDRESS: 4117 return ureg_src(t->address[index]); 4118 4119 case PROGRAM_SYSTEM_VALUE: 4120 assert(index < Elements(t->systemValues)); 4121 return t->systemValues[index]; 4122 4123 default: 4124 assert(!"unknown src register file"); 4125 return ureg_src_undef(); 4126 } 4127} 4128 4129/** 4130 * Create a TGSI ureg_dst register from an st_dst_reg. 4131 */ 4132static struct ureg_dst 4133translate_dst(struct st_translate *t, 4134 const st_dst_reg *dst_reg, 4135 bool saturate) 4136{ 4137 struct ureg_dst dst = dst_register(t, 4138 dst_reg->file, 4139 dst_reg->index); 4140 4141 dst = ureg_writemask(dst, dst_reg->writemask); 4142 4143 if (saturate) 4144 dst = ureg_saturate(dst); 4145 4146 if (dst_reg->reladdr != NULL) 4147 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4148 4149 return dst; 4150} 4151 4152/** 4153 * Create a TGSI ureg_src register from an st_src_reg. 4154 */ 4155static struct ureg_src 4156translate_src(struct st_translate *t, const st_src_reg *src_reg) 4157{ 4158 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4159 4160 src = ureg_swizzle(src, 4161 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4162 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4163 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4164 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4165 4166 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4167 src = ureg_negate(src); 4168 4169 if (src_reg->reladdr != NULL) { 4170 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4171 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4172 * set the bit for src.Negate. So we have to do the operation manually 4173 * here to work around the compiler's problems. */ 4174 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4175 struct ureg_src addr = ureg_src(t->address[0]); 4176 src.Indirect = 1; 4177 src.IndirectFile = addr.File; 4178 src.IndirectIndex = addr.Index; 4179 src.IndirectSwizzle = addr.SwizzleX; 4180 4181 if (src_reg->file != PROGRAM_INPUT && 4182 src_reg->file != PROGRAM_OUTPUT) { 4183 /* If src_reg->index was negative, it was set to zero in 4184 * src_register(). Reassign it now. But don't do this 4185 * for input/output regs since they get remapped while 4186 * const buffers don't. 4187 */ 4188 src.Index = src_reg->index; 4189 } 4190 } 4191 4192 return src; 4193} 4194 4195static struct tgsi_texture_offset 4196translate_tex_offset(struct st_translate *t, 4197 const struct tgsi_texture_offset *in_offset) 4198{ 4199 struct tgsi_texture_offset offset; 4200 4201 assert(in_offset->File == PROGRAM_IMMEDIATE); 4202 4203 offset.File = TGSI_FILE_IMMEDIATE; 4204 offset.Index = in_offset->Index; 4205 offset.SwizzleX = in_offset->SwizzleX; 4206 offset.SwizzleY = in_offset->SwizzleY; 4207 offset.SwizzleZ = in_offset->SwizzleZ; 4208 4209 return offset; 4210} 4211 4212static void 4213compile_tgsi_instruction(struct st_translate *t, 4214 const glsl_to_tgsi_instruction *inst) 4215{ 4216 struct ureg_program *ureg = t->ureg; 4217 GLuint i; 4218 struct ureg_dst dst[1]; 4219 struct ureg_src src[4]; 4220 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4221 4222 unsigned num_dst; 4223 unsigned num_src; 4224 4225 num_dst = num_inst_dst_regs(inst->op); 4226 num_src = num_inst_src_regs(inst->op); 4227 4228 if (num_dst) 4229 dst[0] = translate_dst(t, 4230 &inst->dst, 4231 inst->saturate); 4232 4233 for (i = 0; i < num_src; i++) 4234 src[i] = translate_src(t, &inst->src[i]); 4235 4236 switch(inst->op) { 4237 case TGSI_OPCODE_BGNLOOP: 4238 case TGSI_OPCODE_CAL: 4239 case TGSI_OPCODE_ELSE: 4240 case TGSI_OPCODE_ENDLOOP: 4241 case TGSI_OPCODE_IF: 4242 assert(num_dst == 0); 4243 ureg_label_insn(ureg, 4244 inst->op, 4245 src, num_src, 4246 get_label(t, 4247 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4248 return; 4249 4250 case TGSI_OPCODE_TEX: 4251 case TGSI_OPCODE_TXB: 4252 case TGSI_OPCODE_TXD: 4253 case TGSI_OPCODE_TXL: 4254 case TGSI_OPCODE_TXP: 4255 case TGSI_OPCODE_TXQ: 4256 case TGSI_OPCODE_TXF: 4257 src[num_src++] = t->samplers[inst->sampler]; 4258 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4259 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4260 } 4261 ureg_tex_insn(ureg, 4262 inst->op, 4263 dst, num_dst, 4264 translate_texture_target(inst->tex_target, inst->tex_shadow), 4265 texoffsets, inst->tex_offset_num_offset, 4266 src, num_src); 4267 return; 4268 4269 case TGSI_OPCODE_SCS: 4270 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4271 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4272 break; 4273 4274 default: 4275 ureg_insn(ureg, 4276 inst->op, 4277 dst, num_dst, 4278 src, num_src); 4279 break; 4280 } 4281} 4282 4283/** 4284 * Emit the TGSI instructions for inverting and adjusting WPOS. 4285 * This code is unavoidable because it also depends on whether 4286 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4287 */ 4288static void 4289emit_wpos_adjustment( struct st_translate *t, 4290 const struct gl_program *program, 4291 boolean invert, 4292 GLfloat adjX, GLfloat adjY[2]) 4293{ 4294 struct ureg_program *ureg = t->ureg; 4295 4296 /* Fragment program uses fragment position input. 4297 * Need to replace instances of INPUT[WPOS] with temp T 4298 * where T = INPUT[WPOS] by y is inverted. 4299 */ 4300 static const gl_state_index wposTransformState[STATE_LENGTH] 4301 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4302 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4303 4304 /* XXX: note we are modifying the incoming shader here! Need to 4305 * do this before emitting the constant decls below, or this 4306 * will be missed: 4307 */ 4308 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4309 wposTransformState); 4310 4311 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4312 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4313 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4314 4315 /* First, apply the coordinate shift: */ 4316 if (adjX || adjY[0] || adjY[1]) { 4317 if (adjY[0] != adjY[1]) { 4318 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4319 * depending on whether inversion is actually going to be applied 4320 * or not, which is determined by testing against the inversion 4321 * state variable used below, which will be either +1 or -1. 4322 */ 4323 struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); 4324 4325 ureg_CMP(ureg, adj_temp, 4326 ureg_scalar(wpostrans, invert ? 2 : 0), 4327 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4328 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4329 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4330 } else { 4331 ureg_ADD(ureg, wpos_temp, wpos_input, 4332 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4333 } 4334 wpos_input = ureg_src(wpos_temp); 4335 } else { 4336 /* MOV wpos_temp, input[wpos] 4337 */ 4338 ureg_MOV( ureg, wpos_temp, wpos_input ); 4339 } 4340 4341 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4342 * inversion/identity, or the other way around if we're drawing to an FBO. 4343 */ 4344 if (invert) { 4345 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4346 */ 4347 ureg_MAD( ureg, 4348 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4349 wpos_input, 4350 ureg_scalar(wpostrans, 0), 4351 ureg_scalar(wpostrans, 1)); 4352 } else { 4353 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4354 */ 4355 ureg_MAD( ureg, 4356 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4357 wpos_input, 4358 ureg_scalar(wpostrans, 2), 4359 ureg_scalar(wpostrans, 3)); 4360 } 4361 4362 /* Use wpos_temp as position input from here on: 4363 */ 4364 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4365} 4366 4367 4368/** 4369 * Emit fragment position/ooordinate code. 4370 */ 4371static void 4372emit_wpos(struct st_context *st, 4373 struct st_translate *t, 4374 const struct gl_program *program, 4375 struct ureg_program *ureg) 4376{ 4377 const struct gl_fragment_program *fp = 4378 (const struct gl_fragment_program *) program; 4379 struct pipe_screen *pscreen = st->pipe->screen; 4380 GLfloat adjX = 0.0f; 4381 GLfloat adjY[2] = { 0.0f, 0.0f }; 4382 boolean invert = FALSE; 4383 4384 /* Query the pixel center conventions supported by the pipe driver and set 4385 * adjX, adjY to help out if it cannot handle the requested one internally. 4386 * 4387 * The bias of the y-coordinate depends on whether y-inversion takes place 4388 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4389 * drawing to an FBO (causes additional inversion), and whether the the pipe 4390 * driver origin and the requested origin differ (the latter condition is 4391 * stored in the 'invert' variable). 4392 * 4393 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4394 * 4395 * center shift only: 4396 * i -> h: +0.5 4397 * h -> i: -0.5 4398 * 4399 * inversion only: 4400 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4401 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4402 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4403 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4404 * 4405 * inversion and center shift: 4406 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4407 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4408 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4409 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4410 */ 4411 if (fp->OriginUpperLeft) { 4412 /* Fragment shader wants origin in upper-left */ 4413 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4414 /* the driver supports upper-left origin */ 4415 } 4416 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4417 /* the driver supports lower-left origin, need to invert Y */ 4418 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4419 invert = TRUE; 4420 } 4421 else 4422 assert(0); 4423 } 4424 else { 4425 /* Fragment shader wants origin in lower-left */ 4426 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4427 /* the driver supports lower-left origin */ 4428 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4429 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4430 /* the driver supports upper-left origin, need to invert Y */ 4431 invert = TRUE; 4432 else 4433 assert(0); 4434 } 4435 4436 if (fp->PixelCenterInteger) { 4437 /* Fragment shader wants pixel center integer */ 4438 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4439 /* the driver supports pixel center integer */ 4440 adjY[1] = 1.0f; 4441 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4442 } 4443 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4444 /* the driver supports pixel center half integer, need to bias X,Y */ 4445 adjX = -0.5f; 4446 adjY[0] = -0.5f; 4447 adjY[1] = 0.5f; 4448 } 4449 else 4450 assert(0); 4451 } 4452 else { 4453 /* Fragment shader wants pixel center half integer */ 4454 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4455 /* the driver supports pixel center half integer */ 4456 } 4457 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4458 /* the driver supports pixel center integer, need to bias X,Y */ 4459 adjX = adjY[0] = adjY[1] = 0.5f; 4460 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4461 } 4462 else 4463 assert(0); 4464 } 4465 4466 /* we invert after adjustment so that we avoid the MOV to temporary, 4467 * and reuse the adjustment ADD instead */ 4468 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4469} 4470 4471/** 4472 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4473 * TGSI uses +1 for front, -1 for back. 4474 * This function converts the TGSI value to the GL value. Simply clamping/ 4475 * saturating the value to [0,1] does the job. 4476 */ 4477static void 4478emit_face_var(struct st_translate *t) 4479{ 4480 struct ureg_program *ureg = t->ureg; 4481 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4482 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4483 4484 /* MOV_SAT face_temp, input[face] */ 4485 face_temp = ureg_saturate(face_temp); 4486 ureg_MOV(ureg, face_temp, face_input); 4487 4488 /* Use face_temp as face input from here on: */ 4489 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4490} 4491 4492static void 4493emit_edgeflags(struct st_translate *t) 4494{ 4495 struct ureg_program *ureg = t->ureg; 4496 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4497 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4498 4499 ureg_MOV(ureg, edge_dst, edge_src); 4500} 4501 4502/** 4503 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4504 * \param program the program to translate 4505 * \param numInputs number of input registers used 4506 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4507 * input indexes 4508 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4509 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4510 * each input 4511 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4512 * \param numOutputs number of output registers used 4513 * \param outputMapping maps Mesa fragment program outputs to TGSI 4514 * generic outputs 4515 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4516 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4517 * each output 4518 * 4519 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4520 */ 4521extern "C" enum pipe_error 4522st_translate_program( 4523 struct gl_context *ctx, 4524 uint procType, 4525 struct ureg_program *ureg, 4526 glsl_to_tgsi_visitor *program, 4527 const struct gl_program *proginfo, 4528 GLuint numInputs, 4529 const GLuint inputMapping[], 4530 const ubyte inputSemanticName[], 4531 const ubyte inputSemanticIndex[], 4532 const GLuint interpMode[], 4533 GLuint numOutputs, 4534 const GLuint outputMapping[], 4535 const ubyte outputSemanticName[], 4536 const ubyte outputSemanticIndex[], 4537 boolean passthrough_edgeflags) 4538{ 4539 struct st_translate *t; 4540 unsigned i; 4541 enum pipe_error ret = PIPE_OK; 4542 4543 assert(numInputs <= Elements(t->inputs)); 4544 assert(numOutputs <= Elements(t->outputs)); 4545 4546 t = CALLOC_STRUCT(st_translate); 4547 if (!t) { 4548 ret = PIPE_ERROR_OUT_OF_MEMORY; 4549 goto out; 4550 } 4551 4552 memset(t, 0, sizeof *t); 4553 4554 t->procType = procType; 4555 t->inputMapping = inputMapping; 4556 t->outputMapping = outputMapping; 4557 t->ureg = ureg; 4558 t->pointSizeOutIndex = -1; 4559 t->prevInstWrotePointSize = GL_FALSE; 4560 4561 /* 4562 * Declare input attributes. 4563 */ 4564 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4565 for (i = 0; i < numInputs; i++) { 4566 t->inputs[i] = ureg_DECL_fs_input(ureg, 4567 inputSemanticName[i], 4568 inputSemanticIndex[i], 4569 interpMode[i]); 4570 } 4571 4572 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4573 /* Must do this after setting up t->inputs, and before 4574 * emitting constant references, below: 4575 */ 4576 emit_wpos(st_context(ctx), t, proginfo, ureg); 4577 } 4578 4579 if (proginfo->InputsRead & FRAG_BIT_FACE) 4580 emit_face_var(t); 4581 4582 /* 4583 * Declare output attributes. 4584 */ 4585 for (i = 0; i < numOutputs; i++) { 4586 switch (outputSemanticName[i]) { 4587 case TGSI_SEMANTIC_POSITION: 4588 t->outputs[i] = ureg_DECL_output(ureg, 4589 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4590 outputSemanticIndex[i]); 4591 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4592 break; 4593 case TGSI_SEMANTIC_STENCIL: 4594 t->outputs[i] = ureg_DECL_output(ureg, 4595 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4596 outputSemanticIndex[i]); 4597 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4598 break; 4599 case TGSI_SEMANTIC_COLOR: 4600 t->outputs[i] = ureg_DECL_output(ureg, 4601 TGSI_SEMANTIC_COLOR, 4602 outputSemanticIndex[i]); 4603 break; 4604 default: 4605 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4606 ret = PIPE_ERROR_BAD_INPUT; 4607 goto out; 4608 } 4609 } 4610 } 4611 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4612 for (i = 0; i < numInputs; i++) { 4613 t->inputs[i] = ureg_DECL_gs_input(ureg, 4614 i, 4615 inputSemanticName[i], 4616 inputSemanticIndex[i]); 4617 } 4618 4619 for (i = 0; i < numOutputs; i++) { 4620 t->outputs[i] = ureg_DECL_output(ureg, 4621 outputSemanticName[i], 4622 outputSemanticIndex[i]); 4623 } 4624 } 4625 else { 4626 assert(procType == TGSI_PROCESSOR_VERTEX); 4627 4628 for (i = 0; i < numInputs; i++) { 4629 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4630 } 4631 4632 for (i = 0; i < numOutputs; i++) { 4633 t->outputs[i] = ureg_DECL_output(ureg, 4634 outputSemanticName[i], 4635 outputSemanticIndex[i]); 4636 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4637 /* Writing to the point size result register requires special 4638 * handling to implement clamping. 4639 */ 4640 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4641 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4642 /* XXX: note we are modifying the incoming shader here! Need to 4643 * do this before emitting the constant decls below, or this 4644 * will be missed. 4645 */ 4646 unsigned pointSizeClampConst = 4647 _mesa_add_state_reference(proginfo->Parameters, 4648 pointSizeClampState); 4649 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); 4650 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); 4651 t->pointSizeResult = t->outputs[i]; 4652 t->pointSizeOutIndex = i; 4653 t->outputs[i] = psizregtemp; 4654 } 4655 } 4656 if (passthrough_edgeflags) 4657 emit_edgeflags(t); 4658 } 4659 4660 /* Declare address register. 4661 */ 4662 if (program->num_address_regs > 0) { 4663 assert(program->num_address_regs == 1); 4664 t->address[0] = ureg_DECL_address(ureg); 4665 } 4666 4667 /* Declare misc input registers 4668 */ 4669 { 4670 GLbitfield sysInputs = proginfo->SystemValuesRead; 4671 unsigned numSys = 0; 4672 for (i = 0; sysInputs; i++) { 4673 if (sysInputs & (1 << i)) { 4674 unsigned semName = mesa_sysval_to_semantic[i]; 4675 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4676 numSys++; 4677 sysInputs &= ~(1 << i); 4678 } 4679 } 4680 } 4681 4682 if (program->indirect_addr_temps) { 4683 /* If temps are accessed with indirect addressing, declare temporaries 4684 * in sequential order. Else, we declare them on demand elsewhere. 4685 * (Note: the number of temporaries is equal to program->next_temp) 4686 */ 4687 for (i = 0; i < (unsigned)program->next_temp; i++) { 4688 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4689 t->temps[i] = ureg_DECL_temporary(t->ureg); 4690 } 4691 } 4692 4693 /* Emit constants and uniforms. TGSI uses a single index space for these, 4694 * so we put all the translated regs in t->constants. 4695 */ 4696 if (proginfo->Parameters) { 4697 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4698 if (t->constants == NULL) { 4699 ret = PIPE_ERROR_OUT_OF_MEMORY; 4700 goto out; 4701 } 4702 4703 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4704 switch (proginfo->Parameters->Parameters[i].Type) { 4705 case PROGRAM_ENV_PARAM: 4706 case PROGRAM_LOCAL_PARAM: 4707 case PROGRAM_STATE_VAR: 4708 case PROGRAM_NAMED_PARAM: 4709 case PROGRAM_UNIFORM: 4710 t->constants[i] = ureg_DECL_constant(ureg, i); 4711 break; 4712 4713 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4714 * addressing of the const buffer. 4715 * FIXME: Be smarter and recognize param arrays: 4716 * indirect addressing is only valid within the referenced 4717 * array. 4718 */ 4719 case PROGRAM_CONSTANT: 4720 if (program->indirect_addr_consts) 4721 t->constants[i] = ureg_DECL_constant(ureg, i); 4722 else 4723 t->constants[i] = emit_immediate(t, 4724 proginfo->Parameters->ParameterValues[i], 4725 proginfo->Parameters->Parameters[i].DataType, 4726 4); 4727 break; 4728 default: 4729 break; 4730 } 4731 } 4732 } 4733 4734 /* Emit immediate values. 4735 */ 4736 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4737 if (t->immediates == NULL) { 4738 ret = PIPE_ERROR_OUT_OF_MEMORY; 4739 goto out; 4740 } 4741 i = 0; 4742 foreach_iter(exec_list_iterator, iter, program->immediates) { 4743 immediate_storage *imm = (immediate_storage *)iter.get(); 4744 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4745 } 4746 4747 /* texture samplers */ 4748 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4749 if (program->samplers_used & (1 << i)) { 4750 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4751 } 4752 } 4753 4754 /* Emit each instruction in turn: 4755 */ 4756 foreach_iter(exec_list_iterator, iter, program->instructions) { 4757 set_insn_start(t, ureg_get_instruction_number(ureg)); 4758 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); 4759 4760 if (t->prevInstWrotePointSize && proginfo->Id) { 4761 /* The previous instruction wrote to the (fake) vertex point size 4762 * result register. Now we need to clamp that value to the min/max 4763 * point size range, putting the result into the real point size 4764 * register. 4765 * Note that we can't do this easily at the end of program due to 4766 * possible early return. 4767 */ 4768 set_insn_start(t, ureg_get_instruction_number(ureg)); 4769 ureg_MAX(t->ureg, 4770 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4771 ureg_src(t->outputs[t->pointSizeOutIndex]), 4772 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4773 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4774 ureg_src(t->outputs[t->pointSizeOutIndex]), 4775 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4776 } 4777 t->prevInstWrotePointSize = GL_FALSE; 4778 } 4779 4780 /* Fix up all emitted labels: 4781 */ 4782 for (i = 0; i < t->labels_count; i++) { 4783 ureg_fixup_label(ureg, t->labels[i].token, 4784 t->insn[t->labels[i].branch_target]); 4785 } 4786 4787out: 4788 if (t) { 4789 FREE(t->insn); 4790 FREE(t->labels); 4791 FREE(t->constants); 4792 FREE(t->immediates); 4793 4794 if (t->error) { 4795 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4796 } 4797 4798 FREE(t); 4799 } 4800 4801 return ret; 4802} 4803/* ----------------------------- End TGSI code ------------------------------ */ 4804 4805/** 4806 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4807 * generating Mesa IR. 4808 */ 4809static struct gl_program * 4810get_mesa_program(struct gl_context *ctx, 4811 struct gl_shader_program *shader_program, 4812 struct gl_shader *shader) 4813{ 4814 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4815 struct gl_program *prog; 4816 struct pipe_screen * screen = st_context(ctx)->pipe->screen; 4817 unsigned pipe_shader_type; 4818 GLenum target; 4819 const char *target_string; 4820 bool progress; 4821 struct gl_shader_compiler_options *options = 4822 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4823 4824 switch (shader->Type) { 4825 case GL_VERTEX_SHADER: 4826 target = GL_VERTEX_PROGRAM_ARB; 4827 target_string = "vertex"; 4828 pipe_shader_type = PIPE_SHADER_VERTEX; 4829 break; 4830 case GL_FRAGMENT_SHADER: 4831 target = GL_FRAGMENT_PROGRAM_ARB; 4832 target_string = "fragment"; 4833 pipe_shader_type = PIPE_SHADER_FRAGMENT; 4834 break; 4835 case GL_GEOMETRY_SHADER: 4836 target = GL_GEOMETRY_PROGRAM_NV; 4837 target_string = "geometry"; 4838 pipe_shader_type = PIPE_SHADER_GEOMETRY; 4839 break; 4840 default: 4841 assert(!"should not be reached"); 4842 return NULL; 4843 } 4844 4845 validate_ir_tree(shader->ir); 4846 4847 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4848 if (!prog) 4849 return NULL; 4850 prog->Parameters = _mesa_new_parameter_list(); 4851 v->ctx = ctx; 4852 v->prog = prog; 4853 v->shader_program = shader_program; 4854 v->options = options; 4855 v->glsl_version = ctx->Const.GLSLVersion; 4856 v->native_integers = ctx->Const.NativeIntegers; 4857 4858 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4859 prog->Parameters); 4860 4861 /* Emit intermediate IR for main(). */ 4862 visit_exec_list(shader->ir, v); 4863 4864 /* Now emit bodies for any functions that were used. */ 4865 do { 4866 progress = GL_FALSE; 4867 4868 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4869 function_entry *entry = (function_entry *)iter.get(); 4870 4871 if (!entry->bgn_inst) { 4872 v->current_function = entry; 4873 4874 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4875 entry->bgn_inst->function = entry; 4876 4877 visit_exec_list(&entry->sig->body, v); 4878 4879 glsl_to_tgsi_instruction *last; 4880 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4881 if (last->op != TGSI_OPCODE_RET) 4882 v->emit(NULL, TGSI_OPCODE_RET); 4883 4884 glsl_to_tgsi_instruction *end; 4885 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4886 end->function = entry; 4887 4888 progress = GL_TRUE; 4889 } 4890 } 4891 } while (progress); 4892 4893#if 0 4894 /* Print out some information (for debugging purposes) used by the 4895 * optimization passes. */ 4896 for (i=0; i < v->next_temp; i++) { 4897 int fr = v->get_first_temp_read(i); 4898 int fw = v->get_first_temp_write(i); 4899 int lr = v->get_last_temp_read(i); 4900 int lw = v->get_last_temp_write(i); 4901 4902 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4903 assert(fw <= fr); 4904 } 4905#endif 4906 4907 if (!screen->get_shader_param(screen, pipe_shader_type, 4908 PIPE_SHADER_CAP_OUTPUT_READ)) { 4909 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4910 v->remove_output_reads(PROGRAM_OUTPUT); 4911 if (target == GL_VERTEX_PROGRAM_ARB) 4912 v->remove_output_reads(PROGRAM_VARYING); 4913 } 4914 4915 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4916 v->simplify_cmp(); 4917 v->copy_propagate(); 4918 while (v->eliminate_dead_code_advanced()); 4919 4920 /* FIXME: These passes to optimize temporary registers don't work when there 4921 * is indirect addressing of the temporary register space. We need proper 4922 * array support so that we don't have to give up these passes in every 4923 * shader that uses arrays. 4924 */ 4925 if (!v->indirect_addr_temps) { 4926 v->eliminate_dead_code(); 4927 v->merge_registers(); 4928 v->renumber_registers(); 4929 } 4930 4931 /* Write the END instruction. */ 4932 v->emit(NULL, TGSI_OPCODE_END); 4933 4934 if (ctx->Shader.Flags & GLSL_DUMP) { 4935 printf("\n"); 4936 printf("GLSL IR for linked %s program %d:\n", target_string, 4937 shader_program->Name); 4938 _mesa_print_ir(shader->ir, NULL); 4939 printf("\n"); 4940 printf("\n"); 4941 fflush(stdout); 4942 } 4943 4944 prog->Instructions = NULL; 4945 prog->NumInstructions = 0; 4946 4947 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4948 count_resources(v, prog); 4949 4950 _mesa_reference_program(ctx, &shader->Program, prog); 4951 4952 /* This has to be done last. Any operation the can cause 4953 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4954 * program constant) has to happen before creating this linkage. 4955 */ 4956 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4957 if (!shader_program->LinkStatus) { 4958 return NULL; 4959 } 4960 4961 struct st_vertex_program *stvp; 4962 struct st_fragment_program *stfp; 4963 struct st_geometry_program *stgp; 4964 4965 switch (shader->Type) { 4966 case GL_VERTEX_SHADER: 4967 stvp = (struct st_vertex_program *)prog; 4968 stvp->glsl_to_tgsi = v; 4969 break; 4970 case GL_FRAGMENT_SHADER: 4971 stfp = (struct st_fragment_program *)prog; 4972 stfp->glsl_to_tgsi = v; 4973 break; 4974 case GL_GEOMETRY_SHADER: 4975 stgp = (struct st_geometry_program *)prog; 4976 stgp->glsl_to_tgsi = v; 4977 break; 4978 default: 4979 assert(!"should not be reached"); 4980 return NULL; 4981 } 4982 4983 return prog; 4984} 4985 4986extern "C" { 4987 4988struct gl_shader * 4989st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4990{ 4991 struct gl_shader *shader; 4992 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4993 type == GL_GEOMETRY_SHADER_ARB); 4994 shader = rzalloc(NULL, struct gl_shader); 4995 if (shader) { 4996 shader->Type = type; 4997 shader->Name = name; 4998 _mesa_init_shader(ctx, shader); 4999 } 5000 return shader; 5001} 5002 5003struct gl_shader_program * 5004st_new_shader_program(struct gl_context *ctx, GLuint name) 5005{ 5006 struct gl_shader_program *shProg; 5007 shProg = rzalloc(NULL, struct gl_shader_program); 5008 if (shProg) { 5009 shProg->Name = name; 5010 _mesa_init_shader_program(ctx, shProg); 5011 } 5012 return shProg; 5013} 5014 5015/** 5016 * Link a shader. 5017 * Called via ctx->Driver.LinkShader() 5018 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 5019 * with code lowering and other optimizations. 5020 */ 5021GLboolean 5022st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 5023{ 5024 assert(prog->LinkStatus); 5025 5026 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5027 if (prog->_LinkedShaders[i] == NULL) 5028 continue; 5029 5030 bool progress; 5031 exec_list *ir = prog->_LinkedShaders[i]->ir; 5032 const struct gl_shader_compiler_options *options = 5033 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 5034 5035 do { 5036 progress = false; 5037 5038 /* Lowering */ 5039 do_mat_op_to_vec(ir); 5040 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 5041 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 5042 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 5043 5044 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5045 5046 progress = do_common_optimization(ir, true, true, 5047 options->MaxUnrollIterations) 5048 || progress; 5049 5050 progress = lower_quadop_vector(ir, false) || progress; 5051 5052 if (options->MaxIfDepth == 0) 5053 progress = lower_discard(ir) || progress; 5054 5055 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5056 5057 if (options->EmitNoNoise) 5058 progress = lower_noise(ir) || progress; 5059 5060 /* If there are forms of indirect addressing that the driver 5061 * cannot handle, perform the lowering pass. 5062 */ 5063 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5064 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5065 progress = 5066 lower_variable_index_to_cond_assign(ir, 5067 options->EmitNoIndirectInput, 5068 options->EmitNoIndirectOutput, 5069 options->EmitNoIndirectTemp, 5070 options->EmitNoIndirectUniform) 5071 || progress; 5072 5073 progress = do_vec_index_to_cond_assign(ir) || progress; 5074 } while (progress); 5075 5076 validate_ir_tree(ir); 5077 } 5078 5079 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5080 struct gl_program *linked_prog; 5081 5082 if (prog->_LinkedShaders[i] == NULL) 5083 continue; 5084 5085 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 5086 5087 if (linked_prog) { 5088 static const GLenum targets[] = { 5089 GL_VERTEX_PROGRAM_ARB, 5090 GL_FRAGMENT_PROGRAM_ARB, 5091 GL_GEOMETRY_PROGRAM_NV 5092 }; 5093 5094 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5095 linked_prog); 5096 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5097 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5098 NULL); 5099 _mesa_reference_program(ctx, &linked_prog, NULL); 5100 return GL_FALSE; 5101 } 5102 } 5103 5104 _mesa_reference_program(ctx, &linked_prog, NULL); 5105 } 5106 5107 return GL_TRUE; 5108} 5109 5110void 5111st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi, 5112 const GLuint outputMapping[], 5113 struct pipe_stream_output_info *so) 5114{ 5115 static unsigned comps_to_mask[] = { 5116 0, 5117 TGSI_WRITEMASK_X, 5118 TGSI_WRITEMASK_XY, 5119 TGSI_WRITEMASK_XYZ, 5120 TGSI_WRITEMASK_XYZW 5121 }; 5122 unsigned i; 5123 struct gl_transform_feedback_info *info = 5124 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5125 5126 for (i = 0; i < info->NumOutputs; i++) { 5127 assert(info->Outputs[i].NumComponents < Elements(comps_to_mask)); 5128 so->output[i].register_index = 5129 outputMapping[info->Outputs[i].OutputRegister]; 5130 so->output[i].register_mask = 5131 comps_to_mask[info->Outputs[i].NumComponents]; 5132 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5133 } 5134 so->num_outputs = info->NumOutputs; 5135} 5136 5137} /* extern "C" */ 5138