st_glsl_to_tgsi.cpp revision a8ed00d5f13d7b016bc2ea56f130adc3fa857cc8
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45#include "main/mtypes.h" 46#include "main/shaderobj.h" 47#include "program/hash_table.h" 48 49extern "C" { 50#include "main/shaderapi.h" 51#include "main/uniforms.h" 52#include "program/prog_instruction.h" 53#include "program/prog_optimize.h" 54#include "program/prog_print.h" 55#include "program/program.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81/** 82 * Maximum number of temporary registers. 83 * 84 * It is too big for stack allocated arrays -- it will cause stack overflow on 85 * Windows and likely Mac OS X. 86 */ 87#define MAX_TEMPS 4096 88 89/* will be 4 for GLSL 4.00 */ 90#define MAX_GLSL_TEXTURE_OFFSET 1 91 92class st_src_reg; 93class st_dst_reg; 94 95static int swizzle_for_size(int size); 96 97/** 98 * This struct is a corresponding struct to TGSI ureg_src. 99 */ 100class st_src_reg { 101public: 102 st_src_reg(gl_register_file file, int index, const glsl_type *type) 103 { 104 this->file = file; 105 this->index = index; 106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 107 this->swizzle = swizzle_for_size(type->vector_elements); 108 else 109 this->swizzle = SWIZZLE_XYZW; 110 this->negate = 0; 111 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 112 this->reladdr = NULL; 113 } 114 115 st_src_reg(gl_register_file file, int index, int type) 116 { 117 this->type = type; 118 this->file = file; 119 this->index = index; 120 this->swizzle = SWIZZLE_XYZW; 121 this->negate = 0; 122 this->reladdr = NULL; 123 } 124 125 st_src_reg() 126 { 127 this->type = GLSL_TYPE_ERROR; 128 this->file = PROGRAM_UNDEFINED; 129 this->index = 0; 130 this->swizzle = 0; 131 this->negate = 0; 132 this->reladdr = NULL; 133 } 134 135 explicit st_src_reg(st_dst_reg reg); 136 137 gl_register_file file; /**< PROGRAM_* from Mesa */ 138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 140 int negate; /**< NEGATE_XYZW mask from mesa */ 141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 142 /** Register index should be offset by the integer in this reg. */ 143 st_src_reg *reladdr; 144}; 145 146class st_dst_reg { 147public: 148 st_dst_reg(gl_register_file file, int writemask, int type) 149 { 150 this->file = file; 151 this->index = 0; 152 this->writemask = writemask; 153 this->cond_mask = COND_TR; 154 this->reladdr = NULL; 155 this->type = type; 156 } 157 158 st_dst_reg() 159 { 160 this->type = GLSL_TYPE_ERROR; 161 this->file = PROGRAM_UNDEFINED; 162 this->index = 0; 163 this->writemask = 0; 164 this->cond_mask = COND_TR; 165 this->reladdr = NULL; 166 } 167 168 explicit st_dst_reg(st_src_reg reg); 169 170 gl_register_file file; /**< PROGRAM_* from Mesa */ 171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173 GLuint cond_mask:4; 174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175 /** Register index should be offset by the integer in this reg. */ 176 st_src_reg *reladdr; 177}; 178 179st_src_reg::st_src_reg(st_dst_reg reg) 180{ 181 this->type = reg.type; 182 this->file = reg.file; 183 this->index = reg.index; 184 this->swizzle = SWIZZLE_XYZW; 185 this->negate = 0; 186 this->reladdr = reg.reladdr; 187} 188 189st_dst_reg::st_dst_reg(st_src_reg reg) 190{ 191 this->type = reg.type; 192 this->file = reg.file; 193 this->index = reg.index; 194 this->writemask = WRITEMASK_XYZW; 195 this->cond_mask = COND_TR; 196 this->reladdr = reg.reladdr; 197} 198 199class glsl_to_tgsi_instruction : public exec_node { 200public: 201 /* Callers of this ralloc-based new need not call delete. It's 202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203 static void* operator new(size_t size, void *ctx) 204 { 205 void *node; 206 207 node = rzalloc_size(ctx, size); 208 assert(node != NULL); 209 210 return node; 211 } 212 213 unsigned op; 214 st_dst_reg dst; 215 st_src_reg src[3]; 216 /** Pointer to the ir source this tree came from for debugging */ 217 ir_instruction *ir; 218 GLboolean cond_update; 219 bool saturate; 220 int sampler; /**< sampler index */ 221 int tex_target; /**< One of TEXTURE_*_INDEX */ 222 GLboolean tex_shadow; 223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 224 unsigned tex_offset_num_offset; 225 int dead_mask; /**< Used in dead code elimination */ 226 227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228}; 229 230class variable_storage : public exec_node { 231public: 232 variable_storage(ir_variable *var, gl_register_file file, int index) 233 : file(file), index(index), var(var) 234 { 235 /* empty */ 236 } 237 238 gl_register_file file; 239 int index; 240 ir_variable *var; /* variable that maps to this, if any */ 241}; 242 243class immediate_storage : public exec_node { 244public: 245 immediate_storage(gl_constant_value *values, int size, int type) 246 { 247 memcpy(this->values, values, size * sizeof(gl_constant_value)); 248 this->size = size; 249 this->type = type; 250 } 251 252 gl_constant_value values[4]; 253 int size; /**< Number of components (1-4) */ 254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255}; 256 257class function_entry : public exec_node { 258public: 259 ir_function_signature *sig; 260 261 /** 262 * identifier of this function signature used by the program. 263 * 264 * At the point that TGSI instructions for function calls are 265 * generated, we don't know the address of the first instruction of 266 * the function body. So we make the BranchTarget that is called a 267 * small integer and rewrite them during set_branchtargets(). 268 */ 269 int sig_id; 270 271 /** 272 * Pointer to first instruction of the function body. 273 * 274 * Set during function body emits after main() is processed. 275 */ 276 glsl_to_tgsi_instruction *bgn_inst; 277 278 /** 279 * Index of the first instruction of the function body in actual TGSI. 280 * 281 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 282 */ 283 int inst; 284 285 /** Storage for the return value. */ 286 st_src_reg return_reg; 287}; 288 289class glsl_to_tgsi_visitor : public ir_visitor { 290public: 291 glsl_to_tgsi_visitor(); 292 ~glsl_to_tgsi_visitor(); 293 294 function_entry *current_function; 295 296 struct gl_context *ctx; 297 struct gl_program *prog; 298 struct gl_shader_program *shader_program; 299 struct gl_shader_compiler_options *options; 300 301 int next_temp; 302 303 int num_address_regs; 304 int samplers_used; 305 bool indirect_addr_temps; 306 bool indirect_addr_consts; 307 308 int glsl_version; 309 bool native_integers; 310 311 variable_storage *find_variable_storage(ir_variable *var); 312 313 int add_constant(gl_register_file file, gl_constant_value values[4], 314 int size, int datatype, GLuint *swizzle_out); 315 316 function_entry *get_function_signature(ir_function_signature *sig); 317 318 st_src_reg get_temp(const glsl_type *type); 319 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 320 321 st_src_reg st_src_reg_for_float(float val); 322 st_src_reg st_src_reg_for_int(int val); 323 st_src_reg st_src_reg_for_type(int type, int val); 324 325 /** 326 * \name Visit methods 327 * 328 * As typical for the visitor pattern, there must be one \c visit method for 329 * each concrete subclass of \c ir_instruction. Virtual base classes within 330 * the hierarchy should not have \c visit methods. 331 */ 332 /*@{*/ 333 virtual void visit(ir_variable *); 334 virtual void visit(ir_loop *); 335 virtual void visit(ir_loop_jump *); 336 virtual void visit(ir_function_signature *); 337 virtual void visit(ir_function *); 338 virtual void visit(ir_expression *); 339 virtual void visit(ir_swizzle *); 340 virtual void visit(ir_dereference_variable *); 341 virtual void visit(ir_dereference_array *); 342 virtual void visit(ir_dereference_record *); 343 virtual void visit(ir_assignment *); 344 virtual void visit(ir_constant *); 345 virtual void visit(ir_call *); 346 virtual void visit(ir_return *); 347 virtual void visit(ir_discard *); 348 virtual void visit(ir_texture *); 349 virtual void visit(ir_if *); 350 /*@}*/ 351 352 st_src_reg result; 353 354 /** List of variable_storage */ 355 exec_list variables; 356 357 /** List of immediate_storage */ 358 exec_list immediates; 359 unsigned num_immediates; 360 361 /** List of function_entry */ 362 exec_list function_signatures; 363 int next_signature_id; 364 365 /** List of glsl_to_tgsi_instruction */ 366 exec_list instructions; 367 368 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 369 370 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 371 st_dst_reg dst, st_src_reg src0); 372 373 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 374 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 375 376 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 377 st_dst_reg dst, 378 st_src_reg src0, st_src_reg src1, st_src_reg src2); 379 380 unsigned get_opcode(ir_instruction *ir, unsigned op, 381 st_dst_reg dst, 382 st_src_reg src0, st_src_reg src1); 383 384 /** 385 * Emit the correct dot-product instruction for the type of arguments 386 */ 387 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 388 st_dst_reg dst, 389 st_src_reg src0, 390 st_src_reg src1, 391 unsigned elements); 392 393 void emit_scalar(ir_instruction *ir, unsigned op, 394 st_dst_reg dst, st_src_reg src0); 395 396 void emit_scalar(ir_instruction *ir, unsigned op, 397 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 398 399 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 400 401 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 402 403 void emit_scs(ir_instruction *ir, unsigned op, 404 st_dst_reg dst, const st_src_reg &src); 405 406 bool try_emit_mad(ir_expression *ir, 407 int mul_operand); 408 bool try_emit_mad_for_and_not(ir_expression *ir, 409 int mul_operand); 410 bool try_emit_sat(ir_expression *ir); 411 412 void emit_swz(ir_expression *ir); 413 414 bool process_move_condition(ir_rvalue *ir); 415 416 void simplify_cmp(void); 417 418 void rename_temp_register(int index, int new_index); 419 int get_first_temp_read(int index); 420 int get_first_temp_write(int index); 421 int get_last_temp_read(int index); 422 int get_last_temp_write(int index); 423 424 void copy_propagate(void); 425 void eliminate_dead_code(void); 426 int eliminate_dead_code_advanced(void); 427 void merge_registers(void); 428 void renumber_registers(void); 429 430 void *mem_ctx; 431}; 432 433static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 434 435static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 436 437static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 438 439static void 440fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 441 442static void 443fail_link(struct gl_shader_program *prog, const char *fmt, ...) 444{ 445 va_list args; 446 va_start(args, fmt); 447 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 448 va_end(args); 449 450 prog->LinkStatus = GL_FALSE; 451} 452 453static int 454swizzle_for_size(int size) 455{ 456 int size_swizzles[4] = { 457 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 458 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 461 }; 462 463 assert((size >= 1) && (size <= 4)); 464 return size_swizzles[size - 1]; 465} 466 467static bool 468is_tex_instruction(unsigned opcode) 469{ 470 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 471 return info->is_tex; 472} 473 474static unsigned 475num_inst_dst_regs(unsigned opcode) 476{ 477 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 478 return info->num_dst; 479} 480 481static unsigned 482num_inst_src_regs(unsigned opcode) 483{ 484 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 485 return info->is_tex ? info->num_src - 1 : info->num_src; 486} 487 488glsl_to_tgsi_instruction * 489glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 490 st_dst_reg dst, 491 st_src_reg src0, st_src_reg src1, st_src_reg src2) 492{ 493 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 494 int num_reladdr = 0, i; 495 496 op = get_opcode(ir, op, dst, src0, src1); 497 498 /* If we have to do relative addressing, we want to load the ARL 499 * reg directly for one of the regs, and preload the other reladdr 500 * sources into temps. 501 */ 502 num_reladdr += dst.reladdr != NULL; 503 num_reladdr += src0.reladdr != NULL; 504 num_reladdr += src1.reladdr != NULL; 505 num_reladdr += src2.reladdr != NULL; 506 507 reladdr_to_temp(ir, &src2, &num_reladdr); 508 reladdr_to_temp(ir, &src1, &num_reladdr); 509 reladdr_to_temp(ir, &src0, &num_reladdr); 510 511 if (dst.reladdr) { 512 emit_arl(ir, address_reg, *dst.reladdr); 513 num_reladdr--; 514 } 515 assert(num_reladdr == 0); 516 517 inst->op = op; 518 inst->dst = dst; 519 inst->src[0] = src0; 520 inst->src[1] = src1; 521 inst->src[2] = src2; 522 inst->ir = ir; 523 inst->dead_mask = 0; 524 525 inst->function = NULL; 526 527 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 528 this->num_address_regs = 1; 529 530 /* Update indirect addressing status used by TGSI */ 531 if (dst.reladdr) { 532 switch(dst.file) { 533 case PROGRAM_TEMPORARY: 534 this->indirect_addr_temps = true; 535 break; 536 case PROGRAM_LOCAL_PARAM: 537 case PROGRAM_ENV_PARAM: 538 case PROGRAM_STATE_VAR: 539 case PROGRAM_NAMED_PARAM: 540 case PROGRAM_CONSTANT: 541 case PROGRAM_UNIFORM: 542 this->indirect_addr_consts = true; 543 break; 544 case PROGRAM_IMMEDIATE: 545 assert(!"immediates should not have indirect addressing"); 546 break; 547 default: 548 break; 549 } 550 } 551 else { 552 for (i=0; i<3; i++) { 553 if(inst->src[i].reladdr) { 554 switch(inst->src[i].file) { 555 case PROGRAM_TEMPORARY: 556 this->indirect_addr_temps = true; 557 break; 558 case PROGRAM_LOCAL_PARAM: 559 case PROGRAM_ENV_PARAM: 560 case PROGRAM_STATE_VAR: 561 case PROGRAM_NAMED_PARAM: 562 case PROGRAM_CONSTANT: 563 case PROGRAM_UNIFORM: 564 this->indirect_addr_consts = true; 565 break; 566 case PROGRAM_IMMEDIATE: 567 assert(!"immediates should not have indirect addressing"); 568 break; 569 default: 570 break; 571 } 572 } 573 } 574 } 575 576 this->instructions.push_tail(inst); 577 578 if (native_integers) 579 try_emit_float_set(ir, op, dst); 580 581 return inst; 582} 583 584 585glsl_to_tgsi_instruction * 586glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 587 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 588{ 589 return emit(ir, op, dst, src0, src1, undef_src); 590} 591 592glsl_to_tgsi_instruction * 593glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 594 st_dst_reg dst, st_src_reg src0) 595{ 596 assert(dst.writemask != 0); 597 return emit(ir, op, dst, src0, undef_src, undef_src); 598} 599 600glsl_to_tgsi_instruction * 601glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 602{ 603 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 604} 605 606 /** 607 * Emits the code to convert the result of float SET instructions to integers. 608 */ 609void 610glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 611 st_dst_reg dst) 612{ 613 if ((op == TGSI_OPCODE_SEQ || 614 op == TGSI_OPCODE_SNE || 615 op == TGSI_OPCODE_SGE || 616 op == TGSI_OPCODE_SLT)) 617 { 618 st_src_reg src = st_src_reg(dst); 619 src.negate = ~src.negate; 620 dst.type = GLSL_TYPE_FLOAT; 621 emit(ir, TGSI_OPCODE_F2I, dst, src); 622 } 623} 624 625/** 626 * Determines whether to use an integer, unsigned integer, or float opcode 627 * based on the operands and input opcode, then emits the result. 628 */ 629unsigned 630glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 631 st_dst_reg dst, 632 st_src_reg src0, st_src_reg src1) 633{ 634 int type = GLSL_TYPE_FLOAT; 635 636 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 637 type = GLSL_TYPE_FLOAT; 638 else if (native_integers) 639 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 640 641#define case4(c, f, i, u) \ 642 case TGSI_OPCODE_##c: \ 643 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 644 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 645 else op = TGSI_OPCODE_##f; \ 646 break; 647#define case3(f, i, u) case4(f, f, i, u) 648#define case2fi(f, i) case4(f, f, i, i) 649#define case2iu(i, u) case4(i, LAST, i, u) 650 651 switch(op) { 652 case2fi(ADD, UADD); 653 case2fi(MUL, UMUL); 654 case2fi(MAD, UMAD); 655 case3(DIV, IDIV, UDIV); 656 case3(MAX, IMAX, UMAX); 657 case3(MIN, IMIN, UMIN); 658 case2iu(MOD, UMOD); 659 660 case2fi(SEQ, USEQ); 661 case2fi(SNE, USNE); 662 case3(SGE, ISGE, USGE); 663 case3(SLT, ISLT, USLT); 664 665 case2iu(ISHR, USHR); 666 667 case2fi(SSG, ISSG); 668 case3(ABS, IABS, IABS); 669 670 default: break; 671 } 672 673 assert(op != TGSI_OPCODE_LAST); 674 return op; 675} 676 677glsl_to_tgsi_instruction * 678glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 679 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 680 unsigned elements) 681{ 682 static const unsigned dot_opcodes[] = { 683 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 684 }; 685 686 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 687} 688 689/** 690 * Emits TGSI scalar opcodes to produce unique answers across channels. 691 * 692 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 693 * channel determines the result across all channels. So to do a vec4 694 * of this operation, we want to emit a scalar per source channel used 695 * to produce dest channels. 696 */ 697void 698glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 699 st_dst_reg dst, 700 st_src_reg orig_src0, st_src_reg orig_src1) 701{ 702 int i, j; 703 int done_mask = ~dst.writemask; 704 705 /* TGSI RCP is a scalar operation splatting results to all channels, 706 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 707 * dst channels. 708 */ 709 for (i = 0; i < 4; i++) { 710 GLuint this_mask = (1 << i); 711 glsl_to_tgsi_instruction *inst; 712 st_src_reg src0 = orig_src0; 713 st_src_reg src1 = orig_src1; 714 715 if (done_mask & this_mask) 716 continue; 717 718 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 719 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 720 for (j = i + 1; j < 4; j++) { 721 /* If there is another enabled component in the destination that is 722 * derived from the same inputs, generate its value on this pass as 723 * well. 724 */ 725 if (!(done_mask & (1 << j)) && 726 GET_SWZ(src0.swizzle, j) == src0_swiz && 727 GET_SWZ(src1.swizzle, j) == src1_swiz) { 728 this_mask |= (1 << j); 729 } 730 } 731 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 732 src0_swiz, src0_swiz); 733 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 734 src1_swiz, src1_swiz); 735 736 inst = emit(ir, op, dst, src0, src1); 737 inst->dst.writemask = this_mask; 738 done_mask |= this_mask; 739 } 740} 741 742void 743glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 744 st_dst_reg dst, st_src_reg src0) 745{ 746 st_src_reg undef = undef_src; 747 748 undef.swizzle = SWIZZLE_XXXX; 749 750 emit_scalar(ir, op, dst, src0, undef); 751} 752 753void 754glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 755 st_dst_reg dst, st_src_reg src0) 756{ 757 int op = TGSI_OPCODE_ARL; 758 759 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 760 op = TGSI_OPCODE_UARL; 761 762 emit(NULL, op, dst, src0); 763} 764 765/** 766 * Emit an TGSI_OPCODE_SCS instruction 767 * 768 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 769 * Instead of splatting its result across all four components of the 770 * destination, it writes one value to the \c x component and another value to 771 * the \c y component. 772 * 773 * \param ir IR instruction being processed 774 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 775 * on which value is desired. 776 * \param dst Destination register 777 * \param src Source register 778 */ 779void 780glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 781 st_dst_reg dst, 782 const st_src_reg &src) 783{ 784 /* Vertex programs cannot use the SCS opcode. 785 */ 786 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 787 emit_scalar(ir, op, dst, src); 788 return; 789 } 790 791 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 792 const unsigned scs_mask = (1U << component); 793 int done_mask = ~dst.writemask; 794 st_src_reg tmp; 795 796 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 797 798 /* If there are compnents in the destination that differ from the component 799 * that will be written by the SCS instrution, we'll need a temporary. 800 */ 801 if (scs_mask != unsigned(dst.writemask)) { 802 tmp = get_temp(glsl_type::vec4_type); 803 } 804 805 for (unsigned i = 0; i < 4; i++) { 806 unsigned this_mask = (1U << i); 807 st_src_reg src0 = src; 808 809 if ((done_mask & this_mask) != 0) 810 continue; 811 812 /* The source swizzle specified which component of the source generates 813 * sine / cosine for the current component in the destination. The SCS 814 * instruction requires that this value be swizzle to the X component. 815 * Replace the current swizzle with a swizzle that puts the source in 816 * the X component. 817 */ 818 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 819 820 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 821 src0_swiz, src0_swiz); 822 for (unsigned j = i + 1; j < 4; j++) { 823 /* If there is another enabled component in the destination that is 824 * derived from the same inputs, generate its value on this pass as 825 * well. 826 */ 827 if (!(done_mask & (1 << j)) && 828 GET_SWZ(src0.swizzle, j) == src0_swiz) { 829 this_mask |= (1 << j); 830 } 831 } 832 833 if (this_mask != scs_mask) { 834 glsl_to_tgsi_instruction *inst; 835 st_dst_reg tmp_dst = st_dst_reg(tmp); 836 837 /* Emit the SCS instruction. 838 */ 839 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 840 inst->dst.writemask = scs_mask; 841 842 /* Move the result of the SCS instruction to the desired location in 843 * the destination. 844 */ 845 tmp.swizzle = MAKE_SWIZZLE4(component, component, 846 component, component); 847 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 848 inst->dst.writemask = this_mask; 849 } else { 850 /* Emit the SCS instruction to write directly to the destination. 851 */ 852 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 853 inst->dst.writemask = scs_mask; 854 } 855 856 done_mask |= this_mask; 857 } 858} 859 860int 861glsl_to_tgsi_visitor::add_constant(gl_register_file file, 862 gl_constant_value values[4], int size, int datatype, 863 GLuint *swizzle_out) 864{ 865 if (file == PROGRAM_CONSTANT) { 866 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 867 size, datatype, swizzle_out); 868 } else { 869 int index = 0; 870 immediate_storage *entry; 871 assert(file == PROGRAM_IMMEDIATE); 872 873 /* Search immediate storage to see if we already have an identical 874 * immediate that we can use instead of adding a duplicate entry. 875 */ 876 foreach_iter(exec_list_iterator, iter, this->immediates) { 877 entry = (immediate_storage *)iter.get(); 878 879 if (entry->size == size && 880 entry->type == datatype && 881 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 882 return index; 883 } 884 index++; 885 } 886 887 /* Add this immediate to the list. */ 888 entry = new(mem_ctx) immediate_storage(values, size, datatype); 889 this->immediates.push_tail(entry); 890 this->num_immediates++; 891 return index; 892 } 893} 894 895st_src_reg 896glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 897{ 898 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 899 union gl_constant_value uval; 900 901 uval.f = val; 902 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 903 904 return src; 905} 906 907st_src_reg 908glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 909{ 910 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 911 union gl_constant_value uval; 912 913 assert(native_integers); 914 915 uval.i = val; 916 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 917 918 return src; 919} 920 921st_src_reg 922glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 923{ 924 if (native_integers) 925 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 926 st_src_reg_for_int(val); 927 else 928 return st_src_reg_for_float(val); 929} 930 931static int 932type_size(const struct glsl_type *type) 933{ 934 unsigned int i; 935 int size; 936 937 switch (type->base_type) { 938 case GLSL_TYPE_UINT: 939 case GLSL_TYPE_INT: 940 case GLSL_TYPE_FLOAT: 941 case GLSL_TYPE_BOOL: 942 if (type->is_matrix()) { 943 return type->matrix_columns; 944 } else { 945 /* Regardless of size of vector, it gets a vec4. This is bad 946 * packing for things like floats, but otherwise arrays become a 947 * mess. Hopefully a later pass over the code can pack scalars 948 * down if appropriate. 949 */ 950 return 1; 951 } 952 case GLSL_TYPE_ARRAY: 953 assert(type->length > 0); 954 return type_size(type->fields.array) * type->length; 955 case GLSL_TYPE_STRUCT: 956 size = 0; 957 for (i = 0; i < type->length; i++) { 958 size += type_size(type->fields.structure[i].type); 959 } 960 return size; 961 case GLSL_TYPE_SAMPLER: 962 /* Samplers take up one slot in UNIFORMS[], but they're baked in 963 * at link time. 964 */ 965 return 1; 966 default: 967 assert(0); 968 return 0; 969 } 970} 971 972/** 973 * In the initial pass of codegen, we assign temporary numbers to 974 * intermediate results. (not SSA -- variable assignments will reuse 975 * storage). 976 */ 977st_src_reg 978glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 979{ 980 st_src_reg src; 981 982 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 983 src.file = PROGRAM_TEMPORARY; 984 src.index = next_temp; 985 src.reladdr = NULL; 986 next_temp += type_size(type); 987 988 if (type->is_array() || type->is_record()) { 989 src.swizzle = SWIZZLE_NOOP; 990 } else { 991 src.swizzle = swizzle_for_size(type->vector_elements); 992 } 993 src.negate = 0; 994 995 return src; 996} 997 998variable_storage * 999glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1000{ 1001 1002 variable_storage *entry; 1003 1004 foreach_iter(exec_list_iterator, iter, this->variables) { 1005 entry = (variable_storage *)iter.get(); 1006 1007 if (entry->var == var) 1008 return entry; 1009 } 1010 1011 return NULL; 1012} 1013 1014void 1015glsl_to_tgsi_visitor::visit(ir_variable *ir) 1016{ 1017 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1018 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1019 1020 fp->OriginUpperLeft = ir->origin_upper_left; 1021 fp->PixelCenterInteger = ir->pixel_center_integer; 1022 } 1023 1024 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1025 unsigned int i; 1026 const ir_state_slot *const slots = ir->state_slots; 1027 assert(ir->state_slots != NULL); 1028 1029 /* Check if this statevar's setup in the STATE file exactly 1030 * matches how we'll want to reference it as a 1031 * struct/array/whatever. If not, then we need to move it into 1032 * temporary storage and hope that it'll get copy-propagated 1033 * out. 1034 */ 1035 for (i = 0; i < ir->num_state_slots; i++) { 1036 if (slots[i].swizzle != SWIZZLE_XYZW) { 1037 break; 1038 } 1039 } 1040 1041 variable_storage *storage; 1042 st_dst_reg dst; 1043 if (i == ir->num_state_slots) { 1044 /* We'll set the index later. */ 1045 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1046 this->variables.push_tail(storage); 1047 1048 dst = undef_dst; 1049 } else { 1050 /* The variable_storage constructor allocates slots based on the size 1051 * of the type. However, this had better match the number of state 1052 * elements that we're going to copy into the new temporary. 1053 */ 1054 assert((int) ir->num_state_slots == type_size(ir->type)); 1055 1056 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1057 this->next_temp); 1058 this->variables.push_tail(storage); 1059 this->next_temp += type_size(ir->type); 1060 1061 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1062 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1063 } 1064 1065 1066 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1067 int index = _mesa_add_state_reference(this->prog->Parameters, 1068 (gl_state_index *)slots[i].tokens); 1069 1070 if (storage->file == PROGRAM_STATE_VAR) { 1071 if (storage->index == -1) { 1072 storage->index = index; 1073 } else { 1074 assert(index == storage->index + (int)i); 1075 } 1076 } else { 1077 st_src_reg src(PROGRAM_STATE_VAR, index, 1078 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1079 src.swizzle = slots[i].swizzle; 1080 emit(ir, TGSI_OPCODE_MOV, dst, src); 1081 /* even a float takes up a whole vec4 reg in a struct/array. */ 1082 dst.index++; 1083 } 1084 } 1085 1086 if (storage->file == PROGRAM_TEMPORARY && 1087 dst.index != storage->index + (int) ir->num_state_slots) { 1088 fail_link(this->shader_program, 1089 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1090 ir->name, dst.index - storage->index, 1091 type_size(ir->type)); 1092 } 1093 } 1094} 1095 1096void 1097glsl_to_tgsi_visitor::visit(ir_loop *ir) 1098{ 1099 ir_dereference_variable *counter = NULL; 1100 1101 if (ir->counter != NULL) 1102 counter = new(ir) ir_dereference_variable(ir->counter); 1103 1104 if (ir->from != NULL) { 1105 assert(ir->counter != NULL); 1106 1107 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1108 1109 a->accept(this); 1110 delete a; 1111 } 1112 1113 emit(NULL, TGSI_OPCODE_BGNLOOP); 1114 1115 if (ir->to) { 1116 ir_expression *e = 1117 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1118 counter, ir->to); 1119 ir_if *if_stmt = new(ir) ir_if(e); 1120 1121 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1122 1123 if_stmt->then_instructions.push_tail(brk); 1124 1125 if_stmt->accept(this); 1126 1127 delete if_stmt; 1128 delete e; 1129 delete brk; 1130 } 1131 1132 visit_exec_list(&ir->body_instructions, this); 1133 1134 if (ir->increment) { 1135 ir_expression *e = 1136 new(ir) ir_expression(ir_binop_add, counter->type, 1137 counter, ir->increment); 1138 1139 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1140 1141 a->accept(this); 1142 delete a; 1143 delete e; 1144 } 1145 1146 emit(NULL, TGSI_OPCODE_ENDLOOP); 1147} 1148 1149void 1150glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1151{ 1152 switch (ir->mode) { 1153 case ir_loop_jump::jump_break: 1154 emit(NULL, TGSI_OPCODE_BRK); 1155 break; 1156 case ir_loop_jump::jump_continue: 1157 emit(NULL, TGSI_OPCODE_CONT); 1158 break; 1159 } 1160} 1161 1162 1163void 1164glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1165{ 1166 assert(0); 1167 (void)ir; 1168} 1169 1170void 1171glsl_to_tgsi_visitor::visit(ir_function *ir) 1172{ 1173 /* Ignore function bodies other than main() -- we shouldn't see calls to 1174 * them since they should all be inlined before we get to glsl_to_tgsi. 1175 */ 1176 if (strcmp(ir->name, "main") == 0) { 1177 const ir_function_signature *sig; 1178 exec_list empty; 1179 1180 sig = ir->matching_signature(&empty); 1181 1182 assert(sig); 1183 1184 foreach_iter(exec_list_iterator, iter, sig->body) { 1185 ir_instruction *ir = (ir_instruction *)iter.get(); 1186 1187 ir->accept(this); 1188 } 1189 } 1190} 1191 1192bool 1193glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1194{ 1195 int nonmul_operand = 1 - mul_operand; 1196 st_src_reg a, b, c; 1197 st_dst_reg result_dst; 1198 1199 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1200 if (!expr || expr->operation != ir_binop_mul) 1201 return false; 1202 1203 expr->operands[0]->accept(this); 1204 a = this->result; 1205 expr->operands[1]->accept(this); 1206 b = this->result; 1207 ir->operands[nonmul_operand]->accept(this); 1208 c = this->result; 1209 1210 this->result = get_temp(ir->type); 1211 result_dst = st_dst_reg(this->result); 1212 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1213 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1214 1215 return true; 1216} 1217 1218/** 1219 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1220 * 1221 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1222 * implemented using multiplication, and logical-or is implemented using 1223 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1224 * As result, the logical expression (a & !b) can be rewritten as: 1225 * 1226 * - a * !b 1227 * - a * (1 - b) 1228 * - (a * 1) - (a * b) 1229 * - a + -(a * b) 1230 * - a + (a * -b) 1231 * 1232 * This final expression can be implemented as a single MAD(a, -b, a) 1233 * instruction. 1234 */ 1235bool 1236glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1237{ 1238 const int other_operand = 1 - try_operand; 1239 st_src_reg a, b; 1240 1241 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1242 if (!expr || expr->operation != ir_unop_logic_not) 1243 return false; 1244 1245 ir->operands[other_operand]->accept(this); 1246 a = this->result; 1247 expr->operands[0]->accept(this); 1248 b = this->result; 1249 1250 b.negate = ~b.negate; 1251 1252 this->result = get_temp(ir->type); 1253 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1254 1255 return true; 1256} 1257 1258bool 1259glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1260{ 1261 /* Saturates were only introduced to vertex programs in 1262 * NV_vertex_program3, so don't give them to drivers in the VP. 1263 */ 1264 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1265 return false; 1266 1267 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1268 if (!sat_src) 1269 return false; 1270 1271 sat_src->accept(this); 1272 st_src_reg src = this->result; 1273 1274 /* If we generated an expression instruction into a temporary in 1275 * processing the saturate's operand, apply the saturate to that 1276 * instruction. Otherwise, generate a MOV to do the saturate. 1277 * 1278 * Note that we have to be careful to only do this optimization if 1279 * the instruction in question was what generated src->result. For 1280 * example, ir_dereference_array might generate a MUL instruction 1281 * to create the reladdr, and return us a src reg using that 1282 * reladdr. That MUL result is not the value we're trying to 1283 * saturate. 1284 */ 1285 ir_expression *sat_src_expr = sat_src->as_expression(); 1286 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1287 sat_src_expr->operation == ir_binop_add || 1288 sat_src_expr->operation == ir_binop_dot)) { 1289 glsl_to_tgsi_instruction *new_inst; 1290 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1291 new_inst->saturate = true; 1292 } else { 1293 this->result = get_temp(ir->type); 1294 st_dst_reg result_dst = st_dst_reg(this->result); 1295 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1296 glsl_to_tgsi_instruction *inst; 1297 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1298 inst->saturate = true; 1299 } 1300 1301 return true; 1302} 1303 1304void 1305glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1306 st_src_reg *reg, int *num_reladdr) 1307{ 1308 if (!reg->reladdr) 1309 return; 1310 1311 emit_arl(ir, address_reg, *reg->reladdr); 1312 1313 if (*num_reladdr != 1) { 1314 st_src_reg temp = get_temp(glsl_type::vec4_type); 1315 1316 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1317 *reg = temp; 1318 } 1319 1320 (*num_reladdr)--; 1321} 1322 1323void 1324glsl_to_tgsi_visitor::visit(ir_expression *ir) 1325{ 1326 unsigned int operand; 1327 st_src_reg op[Elements(ir->operands)]; 1328 st_src_reg result_src; 1329 st_dst_reg result_dst; 1330 1331 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1332 */ 1333 if (ir->operation == ir_binop_add) { 1334 if (try_emit_mad(ir, 1)) 1335 return; 1336 if (try_emit_mad(ir, 0)) 1337 return; 1338 } 1339 1340 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1341 */ 1342 if (ir->operation == ir_binop_logic_and) { 1343 if (try_emit_mad_for_and_not(ir, 1)) 1344 return; 1345 if (try_emit_mad_for_and_not(ir, 0)) 1346 return; 1347 } 1348 1349 if (try_emit_sat(ir)) 1350 return; 1351 1352 if (ir->operation == ir_quadop_vector) 1353 assert(!"ir_quadop_vector should have been lowered"); 1354 1355 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1356 this->result.file = PROGRAM_UNDEFINED; 1357 ir->operands[operand]->accept(this); 1358 if (this->result.file == PROGRAM_UNDEFINED) { 1359 ir_print_visitor v; 1360 printf("Failed to get tree for expression operand:\n"); 1361 ir->operands[operand]->accept(&v); 1362 exit(1); 1363 } 1364 op[operand] = this->result; 1365 1366 /* Matrix expression operands should have been broken down to vector 1367 * operations already. 1368 */ 1369 assert(!ir->operands[operand]->type->is_matrix()); 1370 } 1371 1372 int vector_elements = ir->operands[0]->type->vector_elements; 1373 if (ir->operands[1]) { 1374 vector_elements = MAX2(vector_elements, 1375 ir->operands[1]->type->vector_elements); 1376 } 1377 1378 this->result.file = PROGRAM_UNDEFINED; 1379 1380 /* Storage for our result. Ideally for an assignment we'd be using 1381 * the actual storage for the result here, instead. 1382 */ 1383 result_src = get_temp(ir->type); 1384 /* convenience for the emit functions below. */ 1385 result_dst = st_dst_reg(result_src); 1386 /* Limit writes to the channels that will be used by result_src later. 1387 * This does limit this temp's use as a temporary for multi-instruction 1388 * sequences. 1389 */ 1390 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1391 1392 switch (ir->operation) { 1393 case ir_unop_logic_not: 1394 if (result_dst.type != GLSL_TYPE_FLOAT) 1395 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1396 else { 1397 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1398 * older GPUs implement SEQ using multiple instructions (i915 uses two 1399 * SGE instructions and a MUL instruction). Since our logic values are 1400 * 0.0 and 1.0, 1-x also implements !x. 1401 */ 1402 op[0].negate = ~op[0].negate; 1403 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1404 } 1405 break; 1406 case ir_unop_neg: 1407 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1408 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1409 else { 1410 op[0].negate = ~op[0].negate; 1411 result_src = op[0]; 1412 } 1413 break; 1414 case ir_unop_abs: 1415 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1416 break; 1417 case ir_unop_sign: 1418 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1419 break; 1420 case ir_unop_rcp: 1421 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1422 break; 1423 1424 case ir_unop_exp2: 1425 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1426 break; 1427 case ir_unop_exp: 1428 case ir_unop_log: 1429 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1430 break; 1431 case ir_unop_log2: 1432 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1433 break; 1434 case ir_unop_sin: 1435 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1436 break; 1437 case ir_unop_cos: 1438 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1439 break; 1440 case ir_unop_sin_reduced: 1441 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1442 break; 1443 case ir_unop_cos_reduced: 1444 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1445 break; 1446 1447 case ir_unop_dFdx: 1448 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1449 break; 1450 case ir_unop_dFdy: 1451 { 1452 /* The X component contains 1 or -1 depending on whether the framebuffer 1453 * is a FBO or the window system buffer, respectively. 1454 * It is then multiplied with the source operand of DDY. 1455 */ 1456 static const gl_state_index transform_y_state[STATE_LENGTH] 1457 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 1458 1459 unsigned transform_y_index = 1460 _mesa_add_state_reference(this->prog->Parameters, 1461 transform_y_state); 1462 1463 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 1464 transform_y_index, 1465 glsl_type::vec4_type); 1466 transform_y.swizzle = SWIZZLE_XXXX; 1467 1468 st_src_reg temp = get_temp(glsl_type::vec4_type); 1469 1470 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); 1471 emit(ir, TGSI_OPCODE_DDY, result_dst, temp); 1472 break; 1473 } 1474 1475 case ir_unop_noise: { 1476 /* At some point, a motivated person could add a better 1477 * implementation of noise. Currently not even the nvidia 1478 * binary drivers do anything more than this. In any case, the 1479 * place to do this is in the GL state tracker, not the poor 1480 * driver. 1481 */ 1482 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1483 break; 1484 } 1485 1486 case ir_binop_add: 1487 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1488 break; 1489 case ir_binop_sub: 1490 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1491 break; 1492 1493 case ir_binop_mul: 1494 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1495 break; 1496 case ir_binop_div: 1497 if (result_dst.type == GLSL_TYPE_FLOAT) 1498 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1499 else 1500 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1501 break; 1502 case ir_binop_mod: 1503 if (result_dst.type == GLSL_TYPE_FLOAT) 1504 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1505 else 1506 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1507 break; 1508 1509 case ir_binop_less: 1510 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1511 break; 1512 case ir_binop_greater: 1513 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1514 break; 1515 case ir_binop_lequal: 1516 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1517 break; 1518 case ir_binop_gequal: 1519 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1520 break; 1521 case ir_binop_equal: 1522 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1523 break; 1524 case ir_binop_nequal: 1525 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1526 break; 1527 case ir_binop_all_equal: 1528 /* "==" operator producing a scalar boolean. */ 1529 if (ir->operands[0]->type->is_vector() || 1530 ir->operands[1]->type->is_vector()) { 1531 st_src_reg temp = get_temp(native_integers ? 1532 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1533 glsl_type::vec4_type); 1534 1535 if (native_integers) { 1536 st_dst_reg temp_dst = st_dst_reg(temp); 1537 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1538 1539 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1540 1541 /* Emit 1-3 AND operations to combine the SEQ results. */ 1542 switch (ir->operands[0]->type->vector_elements) { 1543 case 2: 1544 break; 1545 case 3: 1546 temp_dst.writemask = WRITEMASK_Y; 1547 temp1.swizzle = SWIZZLE_YYYY; 1548 temp2.swizzle = SWIZZLE_ZZZZ; 1549 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1550 break; 1551 case 4: 1552 temp_dst.writemask = WRITEMASK_X; 1553 temp1.swizzle = SWIZZLE_XXXX; 1554 temp2.swizzle = SWIZZLE_YYYY; 1555 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1556 temp_dst.writemask = WRITEMASK_Y; 1557 temp1.swizzle = SWIZZLE_ZZZZ; 1558 temp2.swizzle = SWIZZLE_WWWW; 1559 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1560 } 1561 1562 temp1.swizzle = SWIZZLE_XXXX; 1563 temp2.swizzle = SWIZZLE_YYYY; 1564 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1565 } else { 1566 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1567 1568 /* After the dot-product, the value will be an integer on the 1569 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1570 */ 1571 emit_dp(ir, result_dst, temp, temp, vector_elements); 1572 1573 /* Negating the result of the dot-product gives values on the range 1574 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1575 * This is achieved using SGE. 1576 */ 1577 st_src_reg sge_src = result_src; 1578 sge_src.negate = ~sge_src.negate; 1579 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1580 } 1581 } else { 1582 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1583 } 1584 break; 1585 case ir_binop_any_nequal: 1586 /* "!=" operator producing a scalar boolean. */ 1587 if (ir->operands[0]->type->is_vector() || 1588 ir->operands[1]->type->is_vector()) { 1589 st_src_reg temp = get_temp(native_integers ? 1590 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1591 glsl_type::vec4_type); 1592 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1593 1594 if (native_integers) { 1595 st_dst_reg temp_dst = st_dst_reg(temp); 1596 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1597 1598 /* Emit 1-3 OR operations to combine the SNE results. */ 1599 switch (ir->operands[0]->type->vector_elements) { 1600 case 2: 1601 break; 1602 case 3: 1603 temp_dst.writemask = WRITEMASK_Y; 1604 temp1.swizzle = SWIZZLE_YYYY; 1605 temp2.swizzle = SWIZZLE_ZZZZ; 1606 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1607 break; 1608 case 4: 1609 temp_dst.writemask = WRITEMASK_X; 1610 temp1.swizzle = SWIZZLE_XXXX; 1611 temp2.swizzle = SWIZZLE_YYYY; 1612 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1613 temp_dst.writemask = WRITEMASK_Y; 1614 temp1.swizzle = SWIZZLE_ZZZZ; 1615 temp2.swizzle = SWIZZLE_WWWW; 1616 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1617 } 1618 1619 temp1.swizzle = SWIZZLE_XXXX; 1620 temp2.swizzle = SWIZZLE_YYYY; 1621 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1622 } else { 1623 /* After the dot-product, the value will be an integer on the 1624 * range [0,4]. Zero stays zero, and positive values become 1.0. 1625 */ 1626 glsl_to_tgsi_instruction *const dp = 1627 emit_dp(ir, result_dst, temp, temp, vector_elements); 1628 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1629 /* The clamping to [0,1] can be done for free in the fragment 1630 * shader with a saturate. 1631 */ 1632 dp->saturate = true; 1633 } else { 1634 /* Negating the result of the dot-product gives values on the range 1635 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1636 * achieved using SLT. 1637 */ 1638 st_src_reg slt_src = result_src; 1639 slt_src.negate = ~slt_src.negate; 1640 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1641 } 1642 } 1643 } else { 1644 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1645 } 1646 break; 1647 1648 case ir_unop_any: { 1649 assert(ir->operands[0]->type->is_vector()); 1650 1651 /* After the dot-product, the value will be an integer on the 1652 * range [0,4]. Zero stays zero, and positive values become 1.0. 1653 */ 1654 glsl_to_tgsi_instruction *const dp = 1655 emit_dp(ir, result_dst, op[0], op[0], 1656 ir->operands[0]->type->vector_elements); 1657 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1658 result_dst.type == GLSL_TYPE_FLOAT) { 1659 /* The clamping to [0,1] can be done for free in the fragment 1660 * shader with a saturate. 1661 */ 1662 dp->saturate = true; 1663 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1664 /* Negating the result of the dot-product gives values on the range 1665 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1666 * is achieved using SLT. 1667 */ 1668 st_src_reg slt_src = result_src; 1669 slt_src.negate = ~slt_src.negate; 1670 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1671 } 1672 else { 1673 /* Use SNE 0 if integers are being used as boolean values. */ 1674 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1675 } 1676 break; 1677 } 1678 1679 case ir_binop_logic_xor: 1680 if (native_integers) 1681 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1682 else 1683 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1684 break; 1685 1686 case ir_binop_logic_or: { 1687 if (native_integers) { 1688 /* If integers are used as booleans, we can use an actual "or" 1689 * instruction. 1690 */ 1691 assert(native_integers); 1692 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1693 } else { 1694 /* After the addition, the value will be an integer on the 1695 * range [0,2]. Zero stays zero, and positive values become 1.0. 1696 */ 1697 glsl_to_tgsi_instruction *add = 1698 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1699 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1700 /* The clamping to [0,1] can be done for free in the fragment 1701 * shader with a saturate if floats are being used as boolean values. 1702 */ 1703 add->saturate = true; 1704 } else { 1705 /* Negating the result of the addition gives values on the range 1706 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1707 * is achieved using SLT. 1708 */ 1709 st_src_reg slt_src = result_src; 1710 slt_src.negate = ~slt_src.negate; 1711 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1712 } 1713 } 1714 break; 1715 } 1716 1717 case ir_binop_logic_and: 1718 /* If native integers are disabled, the bool args are stored as float 0.0 1719 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1720 * actual AND opcode. 1721 */ 1722 if (native_integers) 1723 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1724 else 1725 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1726 break; 1727 1728 case ir_binop_dot: 1729 assert(ir->operands[0]->type->is_vector()); 1730 assert(ir->operands[0]->type == ir->operands[1]->type); 1731 emit_dp(ir, result_dst, op[0], op[1], 1732 ir->operands[0]->type->vector_elements); 1733 break; 1734 1735 case ir_unop_sqrt: 1736 /* sqrt(x) = x * rsq(x). */ 1737 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1738 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1739 /* For incoming channels <= 0, set the result to 0. */ 1740 op[0].negate = ~op[0].negate; 1741 emit(ir, TGSI_OPCODE_CMP, result_dst, 1742 op[0], result_src, st_src_reg_for_float(0.0)); 1743 break; 1744 case ir_unop_rsq: 1745 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1746 break; 1747 case ir_unop_i2f: 1748 if (native_integers) { 1749 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1750 break; 1751 } 1752 /* fallthrough to next case otherwise */ 1753 case ir_unop_b2f: 1754 if (native_integers) { 1755 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1756 break; 1757 } 1758 /* fallthrough to next case otherwise */ 1759 case ir_unop_i2u: 1760 case ir_unop_u2i: 1761 /* Converting between signed and unsigned integers is a no-op. */ 1762 result_src = op[0]; 1763 break; 1764 case ir_unop_b2i: 1765 if (native_integers) { 1766 /* Booleans are stored as integers using ~0 for true and 0 for false. 1767 * GLSL requires that int(bool) return 1 for true and 0 for false. 1768 * This conversion is done with AND, but it could be done with NEG. 1769 */ 1770 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1771 } else { 1772 /* Booleans and integers are both stored as floats when native 1773 * integers are disabled. 1774 */ 1775 result_src = op[0]; 1776 } 1777 break; 1778 case ir_unop_f2i: 1779 if (native_integers) 1780 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1781 else 1782 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1783 break; 1784 case ir_unop_f2u: 1785 if (native_integers) 1786 emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]); 1787 else 1788 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1789 break; 1790 case ir_unop_bitcast_f2i: 1791 case ir_unop_bitcast_f2u: 1792 case ir_unop_bitcast_i2f: 1793 case ir_unop_bitcast_u2f: 1794 result_src = op[0]; 1795 break; 1796 case ir_unop_f2b: 1797 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1798 break; 1799 case ir_unop_i2b: 1800 if (native_integers) 1801 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1802 else 1803 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1804 break; 1805 case ir_unop_trunc: 1806 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1807 break; 1808 case ir_unop_ceil: 1809 emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1810 break; 1811 case ir_unop_floor: 1812 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1813 break; 1814 case ir_unop_round_even: 1815 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1816 break; 1817 case ir_unop_fract: 1818 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1819 break; 1820 1821 case ir_binop_min: 1822 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1823 break; 1824 case ir_binop_max: 1825 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1826 break; 1827 case ir_binop_pow: 1828 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1829 break; 1830 1831 case ir_unop_bit_not: 1832 if (native_integers) { 1833 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1834 break; 1835 } 1836 case ir_unop_u2f: 1837 if (native_integers) { 1838 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1839 break; 1840 } 1841 case ir_binop_lshift: 1842 if (native_integers) { 1843 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1844 break; 1845 } 1846 case ir_binop_rshift: 1847 if (native_integers) { 1848 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1849 break; 1850 } 1851 case ir_binop_bit_and: 1852 if (native_integers) { 1853 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1854 break; 1855 } 1856 case ir_binop_bit_xor: 1857 if (native_integers) { 1858 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1859 break; 1860 } 1861 case ir_binop_bit_or: 1862 if (native_integers) { 1863 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1864 break; 1865 } 1866 1867 assert(!"GLSL 1.30 features unsupported"); 1868 break; 1869 1870 case ir_binop_ubo_load: 1871 assert(!"not yet supported"); 1872 break; 1873 1874 case ir_quadop_vector: 1875 /* This operation should have already been handled. 1876 */ 1877 assert(!"Should not get here."); 1878 break; 1879 } 1880 1881 this->result = result_src; 1882} 1883 1884 1885void 1886glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1887{ 1888 st_src_reg src; 1889 int i; 1890 int swizzle[4]; 1891 1892 /* Note that this is only swizzles in expressions, not those on the left 1893 * hand side of an assignment, which do write masking. See ir_assignment 1894 * for that. 1895 */ 1896 1897 ir->val->accept(this); 1898 src = this->result; 1899 assert(src.file != PROGRAM_UNDEFINED); 1900 1901 for (i = 0; i < 4; i++) { 1902 if (i < ir->type->vector_elements) { 1903 switch (i) { 1904 case 0: 1905 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1906 break; 1907 case 1: 1908 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1909 break; 1910 case 2: 1911 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1912 break; 1913 case 3: 1914 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1915 break; 1916 } 1917 } else { 1918 /* If the type is smaller than a vec4, replicate the last 1919 * channel out. 1920 */ 1921 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1922 } 1923 } 1924 1925 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1926 1927 this->result = src; 1928} 1929 1930void 1931glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1932{ 1933 variable_storage *entry = find_variable_storage(ir->var); 1934 ir_variable *var = ir->var; 1935 1936 if (!entry) { 1937 switch (var->mode) { 1938 case ir_var_uniform: 1939 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1940 var->location); 1941 this->variables.push_tail(entry); 1942 break; 1943 case ir_var_in: 1944 case ir_var_inout: 1945 /* The linker assigns locations for varyings and attributes, 1946 * including deprecated builtins (like gl_Color), user-assign 1947 * generic attributes (glBindVertexLocation), and 1948 * user-defined varyings. 1949 * 1950 * FINISHME: We would hit this path for function arguments. Fix! 1951 */ 1952 assert(var->location != -1); 1953 entry = new(mem_ctx) variable_storage(var, 1954 PROGRAM_INPUT, 1955 var->location); 1956 break; 1957 case ir_var_out: 1958 assert(var->location != -1); 1959 entry = new(mem_ctx) variable_storage(var, 1960 PROGRAM_OUTPUT, 1961 var->location + var->index); 1962 break; 1963 case ir_var_system_value: 1964 entry = new(mem_ctx) variable_storage(var, 1965 PROGRAM_SYSTEM_VALUE, 1966 var->location); 1967 break; 1968 case ir_var_auto: 1969 case ir_var_temporary: 1970 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1971 this->next_temp); 1972 this->variables.push_tail(entry); 1973 1974 next_temp += type_size(var->type); 1975 break; 1976 } 1977 1978 if (!entry) { 1979 printf("Failed to make storage for %s\n", var->name); 1980 exit(1); 1981 } 1982 } 1983 1984 this->result = st_src_reg(entry->file, entry->index, var->type); 1985 if (!native_integers) 1986 this->result.type = GLSL_TYPE_FLOAT; 1987} 1988 1989void 1990glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1991{ 1992 ir_constant *index; 1993 st_src_reg src; 1994 int element_size = type_size(ir->type); 1995 1996 index = ir->array_index->constant_expression_value(); 1997 1998 ir->array->accept(this); 1999 src = this->result; 2000 2001 if (index) { 2002 src.index += index->value.i[0] * element_size; 2003 } else { 2004 /* Variable index array dereference. It eats the "vec4" of the 2005 * base of the array and an index that offsets the TGSI register 2006 * index. 2007 */ 2008 ir->array_index->accept(this); 2009 2010 st_src_reg index_reg; 2011 2012 if (element_size == 1) { 2013 index_reg = this->result; 2014 } else { 2015 index_reg = get_temp(native_integers ? 2016 glsl_type::int_type : glsl_type::float_type); 2017 2018 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 2019 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2020 } 2021 2022 /* If there was already a relative address register involved, add the 2023 * new and the old together to get the new offset. 2024 */ 2025 if (src.reladdr != NULL) { 2026 st_src_reg accum_reg = get_temp(native_integers ? 2027 glsl_type::int_type : glsl_type::float_type); 2028 2029 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2030 index_reg, *src.reladdr); 2031 2032 index_reg = accum_reg; 2033 } 2034 2035 src.reladdr = ralloc(mem_ctx, st_src_reg); 2036 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2037 } 2038 2039 /* If the type is smaller than a vec4, replicate the last channel out. */ 2040 if (ir->type->is_scalar() || ir->type->is_vector()) 2041 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2042 else 2043 src.swizzle = SWIZZLE_NOOP; 2044 2045 this->result = src; 2046} 2047 2048void 2049glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2050{ 2051 unsigned int i; 2052 const glsl_type *struct_type = ir->record->type; 2053 int offset = 0; 2054 2055 ir->record->accept(this); 2056 2057 for (i = 0; i < struct_type->length; i++) { 2058 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2059 break; 2060 offset += type_size(struct_type->fields.structure[i].type); 2061 } 2062 2063 /* If the type is smaller than a vec4, replicate the last channel out. */ 2064 if (ir->type->is_scalar() || ir->type->is_vector()) 2065 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2066 else 2067 this->result.swizzle = SWIZZLE_NOOP; 2068 2069 this->result.index += offset; 2070} 2071 2072/** 2073 * We want to be careful in assignment setup to hit the actual storage 2074 * instead of potentially using a temporary like we might with the 2075 * ir_dereference handler. 2076 */ 2077static st_dst_reg 2078get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2079{ 2080 /* The LHS must be a dereference. If the LHS is a variable indexed array 2081 * access of a vector, it must be separated into a series conditional moves 2082 * before reaching this point (see ir_vec_index_to_cond_assign). 2083 */ 2084 assert(ir->as_dereference()); 2085 ir_dereference_array *deref_array = ir->as_dereference_array(); 2086 if (deref_array) { 2087 assert(!deref_array->array->type->is_vector()); 2088 } 2089 2090 /* Use the rvalue deref handler for the most part. We'll ignore 2091 * swizzles in it and write swizzles using writemask, though. 2092 */ 2093 ir->accept(v); 2094 return st_dst_reg(v->result); 2095} 2096 2097/** 2098 * Process the condition of a conditional assignment 2099 * 2100 * Examines the condition of a conditional assignment to generate the optimal 2101 * first operand of a \c CMP instruction. If the condition is a relational 2102 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2103 * used as the source for the \c CMP instruction. Otherwise the comparison 2104 * is processed to a boolean result, and the boolean result is used as the 2105 * operand to the CMP instruction. 2106 */ 2107bool 2108glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2109{ 2110 ir_rvalue *src_ir = ir; 2111 bool negate = true; 2112 bool switch_order = false; 2113 2114 ir_expression *const expr = ir->as_expression(); 2115 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2116 bool zero_on_left = false; 2117 2118 if (expr->operands[0]->is_zero()) { 2119 src_ir = expr->operands[1]; 2120 zero_on_left = true; 2121 } else if (expr->operands[1]->is_zero()) { 2122 src_ir = expr->operands[0]; 2123 zero_on_left = false; 2124 } 2125 2126 /* a is - 0 + - 0 + 2127 * (a < 0) T F F ( a < 0) T F F 2128 * (0 < a) F F T (-a < 0) F F T 2129 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2130 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2131 * (a > 0) F F T (-a < 0) F F T 2132 * (0 > a) T F F ( a < 0) T F F 2133 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2134 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2135 * 2136 * Note that exchanging the order of 0 and 'a' in the comparison simply 2137 * means that the value of 'a' should be negated. 2138 */ 2139 if (src_ir != ir) { 2140 switch (expr->operation) { 2141 case ir_binop_less: 2142 switch_order = false; 2143 negate = zero_on_left; 2144 break; 2145 2146 case ir_binop_greater: 2147 switch_order = false; 2148 negate = !zero_on_left; 2149 break; 2150 2151 case ir_binop_lequal: 2152 switch_order = true; 2153 negate = !zero_on_left; 2154 break; 2155 2156 case ir_binop_gequal: 2157 switch_order = true; 2158 negate = zero_on_left; 2159 break; 2160 2161 default: 2162 /* This isn't the right kind of comparison afterall, so make sure 2163 * the whole condition is visited. 2164 */ 2165 src_ir = ir; 2166 break; 2167 } 2168 } 2169 } 2170 2171 src_ir->accept(this); 2172 2173 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2174 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2175 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2176 * computing the condition. 2177 */ 2178 if (negate) 2179 this->result.negate = ~this->result.negate; 2180 2181 return switch_order; 2182} 2183 2184void 2185glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2186{ 2187 st_dst_reg l; 2188 st_src_reg r; 2189 int i; 2190 2191 ir->rhs->accept(this); 2192 r = this->result; 2193 2194 l = get_assignment_lhs(ir->lhs, this); 2195 2196 /* FINISHME: This should really set to the correct maximal writemask for each 2197 * FINISHME: component written (in the loops below). This case can only 2198 * FINISHME: occur for matrices, arrays, and structures. 2199 */ 2200 if (ir->write_mask == 0) { 2201 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2202 l.writemask = WRITEMASK_XYZW; 2203 } else if (ir->lhs->type->is_scalar() && 2204 ir->lhs->variable_referenced()->mode == ir_var_out) { 2205 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2206 * FINISHME: W component of fragment shader output zero, work correctly. 2207 */ 2208 l.writemask = WRITEMASK_XYZW; 2209 } else { 2210 int swizzles[4]; 2211 int first_enabled_chan = 0; 2212 int rhs_chan = 0; 2213 2214 l.writemask = ir->write_mask; 2215 2216 for (int i = 0; i < 4; i++) { 2217 if (l.writemask & (1 << i)) { 2218 first_enabled_chan = GET_SWZ(r.swizzle, i); 2219 break; 2220 } 2221 } 2222 2223 /* Swizzle a small RHS vector into the channels being written. 2224 * 2225 * glsl ir treats write_mask as dictating how many channels are 2226 * present on the RHS while TGSI treats write_mask as just 2227 * showing which channels of the vec4 RHS get written. 2228 */ 2229 for (int i = 0; i < 4; i++) { 2230 if (l.writemask & (1 << i)) 2231 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2232 else 2233 swizzles[i] = first_enabled_chan; 2234 } 2235 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2236 swizzles[2], swizzles[3]); 2237 } 2238 2239 assert(l.file != PROGRAM_UNDEFINED); 2240 assert(r.file != PROGRAM_UNDEFINED); 2241 2242 if (ir->condition) { 2243 const bool switch_order = this->process_move_condition(ir->condition); 2244 st_src_reg condition = this->result; 2245 2246 for (i = 0; i < type_size(ir->lhs->type); i++) { 2247 st_src_reg l_src = st_src_reg(l); 2248 st_src_reg condition_temp = condition; 2249 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2250 2251 if (native_integers) { 2252 /* This is necessary because TGSI's CMP instruction expects the 2253 * condition to be a float, and we store booleans as integers. 2254 * If TGSI had a UCMP instruction or similar, this extra 2255 * instruction would not be necessary. 2256 */ 2257 condition_temp = get_temp(glsl_type::vec4_type); 2258 condition.negate = 0; 2259 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2260 condition_temp.swizzle = condition.swizzle; 2261 } 2262 2263 if (switch_order) { 2264 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2265 } else { 2266 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2267 } 2268 2269 l.index++; 2270 r.index++; 2271 } 2272 } else if (ir->rhs->as_expression() && 2273 this->instructions.get_tail() && 2274 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2275 type_size(ir->lhs->type) == 1 && 2276 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2277 /* To avoid emitting an extra MOV when assigning an expression to a 2278 * variable, emit the last instruction of the expression again, but 2279 * replace the destination register with the target of the assignment. 2280 * Dead code elimination will remove the original instruction. 2281 */ 2282 glsl_to_tgsi_instruction *inst, *new_inst; 2283 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2284 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2285 new_inst->saturate = inst->saturate; 2286 inst->dead_mask = inst->dst.writemask; 2287 } else { 2288 for (i = 0; i < type_size(ir->lhs->type); i++) { 2289 emit(ir, TGSI_OPCODE_MOV, l, r); 2290 l.index++; 2291 r.index++; 2292 } 2293 } 2294} 2295 2296 2297void 2298glsl_to_tgsi_visitor::visit(ir_constant *ir) 2299{ 2300 st_src_reg src; 2301 GLfloat stack_vals[4] = { 0 }; 2302 gl_constant_value *values = (gl_constant_value *) stack_vals; 2303 GLenum gl_type = GL_NONE; 2304 unsigned int i; 2305 static int in_array = 0; 2306 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2307 2308 /* Unfortunately, 4 floats is all we can get into 2309 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2310 * aggregate constant and move each constant value into it. If we 2311 * get lucky, copy propagation will eliminate the extra moves. 2312 */ 2313 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2314 st_src_reg temp_base = get_temp(ir->type); 2315 st_dst_reg temp = st_dst_reg(temp_base); 2316 2317 foreach_iter(exec_list_iterator, iter, ir->components) { 2318 ir_constant *field_value = (ir_constant *)iter.get(); 2319 int size = type_size(field_value->type); 2320 2321 assert(size > 0); 2322 2323 field_value->accept(this); 2324 src = this->result; 2325 2326 for (i = 0; i < (unsigned int)size; i++) { 2327 emit(ir, TGSI_OPCODE_MOV, temp, src); 2328 2329 src.index++; 2330 temp.index++; 2331 } 2332 } 2333 this->result = temp_base; 2334 return; 2335 } 2336 2337 if (ir->type->is_array()) { 2338 st_src_reg temp_base = get_temp(ir->type); 2339 st_dst_reg temp = st_dst_reg(temp_base); 2340 int size = type_size(ir->type->fields.array); 2341 2342 assert(size > 0); 2343 in_array++; 2344 2345 for (i = 0; i < ir->type->length; i++) { 2346 ir->array_elements[i]->accept(this); 2347 src = this->result; 2348 for (int j = 0; j < size; j++) { 2349 emit(ir, TGSI_OPCODE_MOV, temp, src); 2350 2351 src.index++; 2352 temp.index++; 2353 } 2354 } 2355 this->result = temp_base; 2356 in_array--; 2357 return; 2358 } 2359 2360 if (ir->type->is_matrix()) { 2361 st_src_reg mat = get_temp(ir->type); 2362 st_dst_reg mat_column = st_dst_reg(mat); 2363 2364 for (i = 0; i < ir->type->matrix_columns; i++) { 2365 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2366 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2367 2368 src = st_src_reg(file, -1, ir->type->base_type); 2369 src.index = add_constant(file, 2370 values, 2371 ir->type->vector_elements, 2372 GL_FLOAT, 2373 &src.swizzle); 2374 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2375 2376 mat_column.index++; 2377 } 2378 2379 this->result = mat; 2380 return; 2381 } 2382 2383 switch (ir->type->base_type) { 2384 case GLSL_TYPE_FLOAT: 2385 gl_type = GL_FLOAT; 2386 for (i = 0; i < ir->type->vector_elements; i++) { 2387 values[i].f = ir->value.f[i]; 2388 } 2389 break; 2390 case GLSL_TYPE_UINT: 2391 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2392 for (i = 0; i < ir->type->vector_elements; i++) { 2393 if (native_integers) 2394 values[i].u = ir->value.u[i]; 2395 else 2396 values[i].f = ir->value.u[i]; 2397 } 2398 break; 2399 case GLSL_TYPE_INT: 2400 gl_type = native_integers ? GL_INT : GL_FLOAT; 2401 for (i = 0; i < ir->type->vector_elements; i++) { 2402 if (native_integers) 2403 values[i].i = ir->value.i[i]; 2404 else 2405 values[i].f = ir->value.i[i]; 2406 } 2407 break; 2408 case GLSL_TYPE_BOOL: 2409 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2410 for (i = 0; i < ir->type->vector_elements; i++) { 2411 if (native_integers) 2412 values[i].u = ir->value.b[i] ? ~0 : 0; 2413 else 2414 values[i].f = ir->value.b[i]; 2415 } 2416 break; 2417 default: 2418 assert(!"Non-float/uint/int/bool constant"); 2419 } 2420 2421 this->result = st_src_reg(file, -1, ir->type); 2422 this->result.index = add_constant(file, 2423 values, 2424 ir->type->vector_elements, 2425 gl_type, 2426 &this->result.swizzle); 2427} 2428 2429function_entry * 2430glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2431{ 2432 function_entry *entry; 2433 2434 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2435 entry = (function_entry *)iter.get(); 2436 2437 if (entry->sig == sig) 2438 return entry; 2439 } 2440 2441 entry = ralloc(mem_ctx, function_entry); 2442 entry->sig = sig; 2443 entry->sig_id = this->next_signature_id++; 2444 entry->bgn_inst = NULL; 2445 2446 /* Allocate storage for all the parameters. */ 2447 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2448 ir_variable *param = (ir_variable *)iter.get(); 2449 variable_storage *storage; 2450 2451 storage = find_variable_storage(param); 2452 assert(!storage); 2453 2454 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2455 this->next_temp); 2456 this->variables.push_tail(storage); 2457 2458 this->next_temp += type_size(param->type); 2459 } 2460 2461 if (!sig->return_type->is_void()) { 2462 entry->return_reg = get_temp(sig->return_type); 2463 } else { 2464 entry->return_reg = undef_src; 2465 } 2466 2467 this->function_signatures.push_tail(entry); 2468 return entry; 2469} 2470 2471void 2472glsl_to_tgsi_visitor::visit(ir_call *ir) 2473{ 2474 glsl_to_tgsi_instruction *call_inst; 2475 ir_function_signature *sig = ir->callee; 2476 function_entry *entry = get_function_signature(sig); 2477 int i; 2478 2479 /* Process in parameters. */ 2480 exec_list_iterator sig_iter = sig->parameters.iterator(); 2481 foreach_iter(exec_list_iterator, iter, *ir) { 2482 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2483 ir_variable *param = (ir_variable *)sig_iter.get(); 2484 2485 if (param->mode == ir_var_in || 2486 param->mode == ir_var_inout) { 2487 variable_storage *storage = find_variable_storage(param); 2488 assert(storage); 2489 2490 param_rval->accept(this); 2491 st_src_reg r = this->result; 2492 2493 st_dst_reg l; 2494 l.file = storage->file; 2495 l.index = storage->index; 2496 l.reladdr = NULL; 2497 l.writemask = WRITEMASK_XYZW; 2498 l.cond_mask = COND_TR; 2499 2500 for (i = 0; i < type_size(param->type); i++) { 2501 emit(ir, TGSI_OPCODE_MOV, l, r); 2502 l.index++; 2503 r.index++; 2504 } 2505 } 2506 2507 sig_iter.next(); 2508 } 2509 assert(!sig_iter.has_next()); 2510 2511 /* Emit call instruction */ 2512 call_inst = emit(ir, TGSI_OPCODE_CAL); 2513 call_inst->function = entry; 2514 2515 /* Process out parameters. */ 2516 sig_iter = sig->parameters.iterator(); 2517 foreach_iter(exec_list_iterator, iter, *ir) { 2518 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2519 ir_variable *param = (ir_variable *)sig_iter.get(); 2520 2521 if (param->mode == ir_var_out || 2522 param->mode == ir_var_inout) { 2523 variable_storage *storage = find_variable_storage(param); 2524 assert(storage); 2525 2526 st_src_reg r; 2527 r.file = storage->file; 2528 r.index = storage->index; 2529 r.reladdr = NULL; 2530 r.swizzle = SWIZZLE_NOOP; 2531 r.negate = 0; 2532 2533 param_rval->accept(this); 2534 st_dst_reg l = st_dst_reg(this->result); 2535 2536 for (i = 0; i < type_size(param->type); i++) { 2537 emit(ir, TGSI_OPCODE_MOV, l, r); 2538 l.index++; 2539 r.index++; 2540 } 2541 } 2542 2543 sig_iter.next(); 2544 } 2545 assert(!sig_iter.has_next()); 2546 2547 /* Process return value. */ 2548 this->result = entry->return_reg; 2549} 2550 2551void 2552glsl_to_tgsi_visitor::visit(ir_texture *ir) 2553{ 2554 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2555 st_dst_reg result_dst, coord_dst; 2556 glsl_to_tgsi_instruction *inst = NULL; 2557 unsigned opcode = TGSI_OPCODE_NOP; 2558 2559 if (ir->coordinate) { 2560 ir->coordinate->accept(this); 2561 2562 /* Put our coords in a temp. We'll need to modify them for shadow, 2563 * projection, or LOD, so the only case we'd use it as is is if 2564 * we're doing plain old texturing. The optimization passes on 2565 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2566 */ 2567 coord = get_temp(glsl_type::vec4_type); 2568 coord_dst = st_dst_reg(coord); 2569 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2570 } 2571 2572 if (ir->projector) { 2573 ir->projector->accept(this); 2574 projector = this->result; 2575 } 2576 2577 /* Storage for our result. Ideally for an assignment we'd be using 2578 * the actual storage for the result here, instead. 2579 */ 2580 result_src = get_temp(glsl_type::vec4_type); 2581 result_dst = st_dst_reg(result_src); 2582 2583 switch (ir->op) { 2584 case ir_tex: 2585 opcode = TGSI_OPCODE_TEX; 2586 break; 2587 case ir_txb: 2588 opcode = TGSI_OPCODE_TXB; 2589 ir->lod_info.bias->accept(this); 2590 lod_info = this->result; 2591 break; 2592 case ir_txl: 2593 opcode = TGSI_OPCODE_TXL; 2594 ir->lod_info.lod->accept(this); 2595 lod_info = this->result; 2596 break; 2597 case ir_txd: 2598 opcode = TGSI_OPCODE_TXD; 2599 ir->lod_info.grad.dPdx->accept(this); 2600 dx = this->result; 2601 ir->lod_info.grad.dPdy->accept(this); 2602 dy = this->result; 2603 break; 2604 case ir_txs: 2605 opcode = TGSI_OPCODE_TXQ; 2606 ir->lod_info.lod->accept(this); 2607 lod_info = this->result; 2608 break; 2609 case ir_txf: 2610 opcode = TGSI_OPCODE_TXF; 2611 ir->lod_info.lod->accept(this); 2612 lod_info = this->result; 2613 if (ir->offset) { 2614 ir->offset->accept(this); 2615 offset = this->result; 2616 } 2617 break; 2618 } 2619 2620 const glsl_type *sampler_type = ir->sampler->type; 2621 2622 if (ir->projector) { 2623 if (opcode == TGSI_OPCODE_TEX) { 2624 /* Slot the projector in as the last component of the coord. */ 2625 coord_dst.writemask = WRITEMASK_W; 2626 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2627 coord_dst.writemask = WRITEMASK_XYZW; 2628 opcode = TGSI_OPCODE_TXP; 2629 } else { 2630 st_src_reg coord_w = coord; 2631 coord_w.swizzle = SWIZZLE_WWWW; 2632 2633 /* For the other TEX opcodes there's no projective version 2634 * since the last slot is taken up by LOD info. Do the 2635 * projective divide now. 2636 */ 2637 coord_dst.writemask = WRITEMASK_W; 2638 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2639 2640 /* In the case where we have to project the coordinates "by hand," 2641 * the shadow comparator value must also be projected. 2642 */ 2643 st_src_reg tmp_src = coord; 2644 if (ir->shadow_comparitor) { 2645 /* Slot the shadow value in as the second to last component of the 2646 * coord. 2647 */ 2648 ir->shadow_comparitor->accept(this); 2649 2650 tmp_src = get_temp(glsl_type::vec4_type); 2651 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2652 2653 /* Projective division not allowed for array samplers. */ 2654 assert(!sampler_type->sampler_array); 2655 2656 tmp_dst.writemask = WRITEMASK_Z; 2657 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2658 2659 tmp_dst.writemask = WRITEMASK_XY; 2660 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2661 } 2662 2663 coord_dst.writemask = WRITEMASK_XYZ; 2664 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2665 2666 coord_dst.writemask = WRITEMASK_XYZW; 2667 coord.swizzle = SWIZZLE_XYZW; 2668 } 2669 } 2670 2671 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2672 * comparator was put in the correct place (and projected) by the code, 2673 * above, that handles by-hand projection. 2674 */ 2675 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2676 /* Slot the shadow value in as the second to last component of the 2677 * coord. 2678 */ 2679 ir->shadow_comparitor->accept(this); 2680 2681 /* XXX This will need to be updated for cubemap array samplers. */ 2682 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2683 sampler_type->sampler_array) || 2684 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 2685 coord_dst.writemask = WRITEMASK_W; 2686 } else { 2687 coord_dst.writemask = WRITEMASK_Z; 2688 } 2689 2690 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2691 coord_dst.writemask = WRITEMASK_XYZW; 2692 } 2693 2694 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2695 opcode == TGSI_OPCODE_TXF) { 2696 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2697 coord_dst.writemask = WRITEMASK_W; 2698 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2699 coord_dst.writemask = WRITEMASK_XYZW; 2700 } 2701 2702 if (opcode == TGSI_OPCODE_TXD) 2703 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2704 else if (opcode == TGSI_OPCODE_TXQ) 2705 inst = emit(ir, opcode, result_dst, lod_info); 2706 else if (opcode == TGSI_OPCODE_TXF) { 2707 inst = emit(ir, opcode, result_dst, coord); 2708 } else 2709 inst = emit(ir, opcode, result_dst, coord); 2710 2711 if (ir->shadow_comparitor) 2712 inst->tex_shadow = GL_TRUE; 2713 2714 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2715 this->shader_program, 2716 this->prog); 2717 2718 if (ir->offset) { 2719 inst->tex_offset_num_offset = 1; 2720 inst->tex_offsets[0].Index = offset.index; 2721 inst->tex_offsets[0].File = offset.file; 2722 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2723 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2724 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2725 } 2726 2727 switch (sampler_type->sampler_dimensionality) { 2728 case GLSL_SAMPLER_DIM_1D: 2729 inst->tex_target = (sampler_type->sampler_array) 2730 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2731 break; 2732 case GLSL_SAMPLER_DIM_2D: 2733 inst->tex_target = (sampler_type->sampler_array) 2734 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2735 break; 2736 case GLSL_SAMPLER_DIM_3D: 2737 inst->tex_target = TEXTURE_3D_INDEX; 2738 break; 2739 case GLSL_SAMPLER_DIM_CUBE: 2740 inst->tex_target = TEXTURE_CUBE_INDEX; 2741 break; 2742 case GLSL_SAMPLER_DIM_RECT: 2743 inst->tex_target = TEXTURE_RECT_INDEX; 2744 break; 2745 case GLSL_SAMPLER_DIM_BUF: 2746 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2747 break; 2748 case GLSL_SAMPLER_DIM_EXTERNAL: 2749 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2750 break; 2751 default: 2752 assert(!"Should not get here."); 2753 } 2754 2755 this->result = result_src; 2756} 2757 2758void 2759glsl_to_tgsi_visitor::visit(ir_return *ir) 2760{ 2761 if (ir->get_value()) { 2762 st_dst_reg l; 2763 int i; 2764 2765 assert(current_function); 2766 2767 ir->get_value()->accept(this); 2768 st_src_reg r = this->result; 2769 2770 l = st_dst_reg(current_function->return_reg); 2771 2772 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2773 emit(ir, TGSI_OPCODE_MOV, l, r); 2774 l.index++; 2775 r.index++; 2776 } 2777 } 2778 2779 emit(ir, TGSI_OPCODE_RET); 2780} 2781 2782void 2783glsl_to_tgsi_visitor::visit(ir_discard *ir) 2784{ 2785 if (ir->condition) { 2786 ir->condition->accept(this); 2787 this->result.negate = ~this->result.negate; 2788 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2789 } else { 2790 emit(ir, TGSI_OPCODE_KILP); 2791 } 2792} 2793 2794void 2795glsl_to_tgsi_visitor::visit(ir_if *ir) 2796{ 2797 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2798 glsl_to_tgsi_instruction *prev_inst; 2799 2800 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2801 2802 ir->condition->accept(this); 2803 assert(this->result.file != PROGRAM_UNDEFINED); 2804 2805 if (this->options->EmitCondCodes) { 2806 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2807 2808 /* See if we actually generated any instruction for generating 2809 * the condition. If not, then cook up a move to a temp so we 2810 * have something to set cond_update on. 2811 */ 2812 if (cond_inst == prev_inst) { 2813 st_src_reg temp = get_temp(glsl_type::bool_type); 2814 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2815 } 2816 cond_inst->cond_update = GL_TRUE; 2817 2818 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2819 if_inst->dst.cond_mask = COND_NE; 2820 } else { 2821 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2822 } 2823 2824 this->instructions.push_tail(if_inst); 2825 2826 visit_exec_list(&ir->then_instructions, this); 2827 2828 if (!ir->else_instructions.is_empty()) { 2829 emit(ir->condition, TGSI_OPCODE_ELSE); 2830 visit_exec_list(&ir->else_instructions, this); 2831 } 2832 2833 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2834} 2835 2836glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2837{ 2838 result.file = PROGRAM_UNDEFINED; 2839 next_temp = 1; 2840 next_signature_id = 1; 2841 num_immediates = 0; 2842 current_function = NULL; 2843 num_address_regs = 0; 2844 samplers_used = 0; 2845 indirect_addr_temps = false; 2846 indirect_addr_consts = false; 2847 glsl_version = 0; 2848 native_integers = false; 2849 mem_ctx = ralloc_context(NULL); 2850 ctx = NULL; 2851 prog = NULL; 2852 shader_program = NULL; 2853 options = NULL; 2854} 2855 2856glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2857{ 2858 ralloc_free(mem_ctx); 2859} 2860 2861extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2862{ 2863 delete v; 2864} 2865 2866 2867/** 2868 * Count resources used by the given gpu program (number of texture 2869 * samplers, etc). 2870 */ 2871static void 2872count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2873{ 2874 v->samplers_used = 0; 2875 2876 foreach_iter(exec_list_iterator, iter, v->instructions) { 2877 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2878 2879 if (is_tex_instruction(inst->op)) { 2880 v->samplers_used |= 1 << inst->sampler; 2881 2882 if (inst->tex_shadow) { 2883 prog->ShadowSamplers |= 1 << inst->sampler; 2884 } 2885 } 2886 } 2887 2888 prog->SamplersUsed = v->samplers_used; 2889 2890 if (v->shader_program != NULL) 2891 _mesa_update_shader_textures_used(v->shader_program, prog); 2892} 2893 2894static void 2895set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2896 struct gl_shader_program *shader_program, 2897 const char *name, const glsl_type *type, 2898 ir_constant *val) 2899{ 2900 if (type->is_record()) { 2901 ir_constant *field_constant; 2902 2903 field_constant = (ir_constant *)val->components.get_head(); 2904 2905 for (unsigned int i = 0; i < type->length; i++) { 2906 const glsl_type *field_type = type->fields.structure[i].type; 2907 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2908 type->fields.structure[i].name); 2909 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2910 field_type, field_constant); 2911 field_constant = (ir_constant *)field_constant->next; 2912 } 2913 return; 2914 } 2915 2916 unsigned offset; 2917 unsigned index = _mesa_get_uniform_location(ctx, shader_program, name, 2918 &offset); 2919 if (offset == GL_INVALID_INDEX) { 2920 fail_link(shader_program, 2921 "Couldn't find uniform for initializer %s\n", name); 2922 return; 2923 } 2924 int loc = _mesa_uniform_merge_location_offset(index, offset); 2925 2926 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2927 ir_constant *element; 2928 const glsl_type *element_type; 2929 if (type->is_array()) { 2930 element = val->array_elements[i]; 2931 element_type = type->fields.array; 2932 } else { 2933 element = val; 2934 element_type = type; 2935 } 2936 2937 void *values; 2938 2939 if (element_type->base_type == GLSL_TYPE_BOOL) { 2940 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2941 for (unsigned int j = 0; j < element_type->components(); j++) { 2942 conv[j] = element->value.b[j]; 2943 } 2944 values = (void *)conv; 2945 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2946 element_type->vector_elements, 2947 1); 2948 } else { 2949 values = &element->value; 2950 } 2951 2952 if (element_type->is_matrix()) { 2953 _mesa_uniform_matrix(ctx, shader_program, 2954 element_type->matrix_columns, 2955 element_type->vector_elements, 2956 loc, 1, GL_FALSE, (GLfloat *)values); 2957 } else { 2958 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2959 values, element_type->gl_type); 2960 } 2961 2962 loc++; 2963 } 2964} 2965 2966/** 2967 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2968 * are read from the given src in this instruction 2969 */ 2970static int 2971get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2972{ 2973 int read_mask = 0, comp; 2974 2975 /* Now, given the src swizzle and the written channels, find which 2976 * components are actually read 2977 */ 2978 for (comp = 0; comp < 4; ++comp) { 2979 const unsigned coord = GET_SWZ(src.swizzle, comp); 2980 ASSERT(coord < 4); 2981 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2982 read_mask |= 1 << coord; 2983 } 2984 2985 return read_mask; 2986} 2987 2988/** 2989 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 2990 * instruction is the first instruction to write to register T0. There are 2991 * several lowering passes done in GLSL IR (e.g. branches and 2992 * relative addressing) that create a large number of conditional assignments 2993 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 2994 * 2995 * Here is why this conversion is safe: 2996 * CMP T0, T1 T2 T0 can be expanded to: 2997 * if (T1 < 0.0) 2998 * MOV T0, T2; 2999 * else 3000 * MOV T0, T0; 3001 * 3002 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3003 * as the original program. If (T1 < 0.0) evaluates to false, executing 3004 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3005 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3006 * because any instruction that was going to read from T0 after this was going 3007 * to read a garbage value anyway. 3008 */ 3009void 3010glsl_to_tgsi_visitor::simplify_cmp(void) 3011{ 3012 unsigned *tempWrites; 3013 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3014 3015 tempWrites = new unsigned[MAX_TEMPS]; 3016 if (!tempWrites) { 3017 return; 3018 } 3019 memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); 3020 memset(outputWrites, 0, sizeof(outputWrites)); 3021 3022 foreach_iter(exec_list_iterator, iter, this->instructions) { 3023 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3024 unsigned prevWriteMask = 0; 3025 3026 /* Give up if we encounter relative addressing or flow control. */ 3027 if (inst->dst.reladdr || 3028 tgsi_get_opcode_info(inst->op)->is_branch || 3029 inst->op == TGSI_OPCODE_BGNSUB || 3030 inst->op == TGSI_OPCODE_CONT || 3031 inst->op == TGSI_OPCODE_END || 3032 inst->op == TGSI_OPCODE_ENDSUB || 3033 inst->op == TGSI_OPCODE_RET) { 3034 break; 3035 } 3036 3037 if (inst->dst.file == PROGRAM_OUTPUT) { 3038 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3039 prevWriteMask = outputWrites[inst->dst.index]; 3040 outputWrites[inst->dst.index] |= inst->dst.writemask; 3041 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3042 assert(inst->dst.index < MAX_TEMPS); 3043 prevWriteMask = tempWrites[inst->dst.index]; 3044 tempWrites[inst->dst.index] |= inst->dst.writemask; 3045 } 3046 3047 /* For a CMP to be considered a conditional write, the destination 3048 * register and source register two must be the same. */ 3049 if (inst->op == TGSI_OPCODE_CMP 3050 && !(inst->dst.writemask & prevWriteMask) 3051 && inst->src[2].file == inst->dst.file 3052 && inst->src[2].index == inst->dst.index 3053 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3054 3055 inst->op = TGSI_OPCODE_MOV; 3056 inst->src[0] = inst->src[1]; 3057 } 3058 } 3059 3060 delete [] tempWrites; 3061} 3062 3063/* Replaces all references to a temporary register index with another index. */ 3064void 3065glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3066{ 3067 foreach_iter(exec_list_iterator, iter, this->instructions) { 3068 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3069 unsigned j; 3070 3071 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3072 if (inst->src[j].file == PROGRAM_TEMPORARY && 3073 inst->src[j].index == index) { 3074 inst->src[j].index = new_index; 3075 } 3076 } 3077 3078 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3079 inst->dst.index = new_index; 3080 } 3081 } 3082} 3083 3084int 3085glsl_to_tgsi_visitor::get_first_temp_read(int index) 3086{ 3087 int depth = 0; /* loop depth */ 3088 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3089 unsigned i = 0, j; 3090 3091 foreach_iter(exec_list_iterator, iter, this->instructions) { 3092 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3093 3094 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3095 if (inst->src[j].file == PROGRAM_TEMPORARY && 3096 inst->src[j].index == index) { 3097 return (depth == 0) ? i : loop_start; 3098 } 3099 } 3100 3101 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3102 if(depth++ == 0) 3103 loop_start = i; 3104 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3105 if (--depth == 0) 3106 loop_start = -1; 3107 } 3108 assert(depth >= 0); 3109 3110 i++; 3111 } 3112 3113 return -1; 3114} 3115 3116int 3117glsl_to_tgsi_visitor::get_first_temp_write(int index) 3118{ 3119 int depth = 0; /* loop depth */ 3120 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3121 int i = 0; 3122 3123 foreach_iter(exec_list_iterator, iter, this->instructions) { 3124 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3125 3126 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3127 return (depth == 0) ? i : loop_start; 3128 } 3129 3130 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3131 if(depth++ == 0) 3132 loop_start = i; 3133 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3134 if (--depth == 0) 3135 loop_start = -1; 3136 } 3137 assert(depth >= 0); 3138 3139 i++; 3140 } 3141 3142 return -1; 3143} 3144 3145int 3146glsl_to_tgsi_visitor::get_last_temp_read(int index) 3147{ 3148 int depth = 0; /* loop depth */ 3149 int last = -1; /* index of last instruction that reads the temporary */ 3150 unsigned i = 0, j; 3151 3152 foreach_iter(exec_list_iterator, iter, this->instructions) { 3153 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3154 3155 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3156 if (inst->src[j].file == PROGRAM_TEMPORARY && 3157 inst->src[j].index == index) { 3158 last = (depth == 0) ? i : -2; 3159 } 3160 } 3161 3162 if (inst->op == TGSI_OPCODE_BGNLOOP) 3163 depth++; 3164 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3165 if (--depth == 0 && last == -2) 3166 last = i; 3167 assert(depth >= 0); 3168 3169 i++; 3170 } 3171 3172 assert(last >= -1); 3173 return last; 3174} 3175 3176int 3177glsl_to_tgsi_visitor::get_last_temp_write(int index) 3178{ 3179 int depth = 0; /* loop depth */ 3180 int last = -1; /* index of last instruction that writes to the temporary */ 3181 int i = 0; 3182 3183 foreach_iter(exec_list_iterator, iter, this->instructions) { 3184 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3185 3186 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3187 last = (depth == 0) ? i : -2; 3188 3189 if (inst->op == TGSI_OPCODE_BGNLOOP) 3190 depth++; 3191 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3192 if (--depth == 0 && last == -2) 3193 last = i; 3194 assert(depth >= 0); 3195 3196 i++; 3197 } 3198 3199 assert(last >= -1); 3200 return last; 3201} 3202 3203/* 3204 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3205 * channels for copy propagation and updates following instructions to 3206 * use the original versions. 3207 * 3208 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3209 * will occur. As an example, a TXP production before this pass: 3210 * 3211 * 0: MOV TEMP[1], INPUT[4].xyyy; 3212 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3213 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3214 * 3215 * and after: 3216 * 3217 * 0: MOV TEMP[1], INPUT[4].xyyy; 3218 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3219 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3220 * 3221 * which allows for dead code elimination on TEMP[1]'s writes. 3222 */ 3223void 3224glsl_to_tgsi_visitor::copy_propagate(void) 3225{ 3226 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3227 glsl_to_tgsi_instruction *, 3228 this->next_temp * 4); 3229 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3230 int level = 0; 3231 3232 foreach_iter(exec_list_iterator, iter, this->instructions) { 3233 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3234 3235 assert(inst->dst.file != PROGRAM_TEMPORARY 3236 || inst->dst.index < this->next_temp); 3237 3238 /* First, do any copy propagation possible into the src regs. */ 3239 for (int r = 0; r < 3; r++) { 3240 glsl_to_tgsi_instruction *first = NULL; 3241 bool good = true; 3242 int acp_base = inst->src[r].index * 4; 3243 3244 if (inst->src[r].file != PROGRAM_TEMPORARY || 3245 inst->src[r].reladdr) 3246 continue; 3247 3248 /* See if we can find entries in the ACP consisting of MOVs 3249 * from the same src register for all the swizzled channels 3250 * of this src register reference. 3251 */ 3252 for (int i = 0; i < 4; i++) { 3253 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3254 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3255 3256 if (!copy_chan) { 3257 good = false; 3258 break; 3259 } 3260 3261 assert(acp_level[acp_base + src_chan] <= level); 3262 3263 if (!first) { 3264 first = copy_chan; 3265 } else { 3266 if (first->src[0].file != copy_chan->src[0].file || 3267 first->src[0].index != copy_chan->src[0].index) { 3268 good = false; 3269 break; 3270 } 3271 } 3272 } 3273 3274 if (good) { 3275 /* We've now validated that we can copy-propagate to 3276 * replace this src register reference. Do it. 3277 */ 3278 inst->src[r].file = first->src[0].file; 3279 inst->src[r].index = first->src[0].index; 3280 3281 int swizzle = 0; 3282 for (int i = 0; i < 4; i++) { 3283 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3284 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3285 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3286 (3 * i)); 3287 } 3288 inst->src[r].swizzle = swizzle; 3289 } 3290 } 3291 3292 switch (inst->op) { 3293 case TGSI_OPCODE_BGNLOOP: 3294 case TGSI_OPCODE_ENDLOOP: 3295 /* End of a basic block, clear the ACP entirely. */ 3296 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3297 break; 3298 3299 case TGSI_OPCODE_IF: 3300 ++level; 3301 break; 3302 3303 case TGSI_OPCODE_ENDIF: 3304 case TGSI_OPCODE_ELSE: 3305 /* Clear all channels written inside the block from the ACP, but 3306 * leaving those that were not touched. 3307 */ 3308 for (int r = 0; r < this->next_temp; r++) { 3309 for (int c = 0; c < 4; c++) { 3310 if (!acp[4 * r + c]) 3311 continue; 3312 3313 if (acp_level[4 * r + c] >= level) 3314 acp[4 * r + c] = NULL; 3315 } 3316 } 3317 if (inst->op == TGSI_OPCODE_ENDIF) 3318 --level; 3319 break; 3320 3321 default: 3322 /* Continuing the block, clear any written channels from 3323 * the ACP. 3324 */ 3325 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3326 /* Any temporary might be written, so no copy propagation 3327 * across this instruction. 3328 */ 3329 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3330 } else if (inst->dst.file == PROGRAM_OUTPUT && 3331 inst->dst.reladdr) { 3332 /* Any output might be written, so no copy propagation 3333 * from outputs across this instruction. 3334 */ 3335 for (int r = 0; r < this->next_temp; r++) { 3336 for (int c = 0; c < 4; c++) { 3337 if (!acp[4 * r + c]) 3338 continue; 3339 3340 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3341 acp[4 * r + c] = NULL; 3342 } 3343 } 3344 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3345 inst->dst.file == PROGRAM_OUTPUT) { 3346 /* Clear where it's used as dst. */ 3347 if (inst->dst.file == PROGRAM_TEMPORARY) { 3348 for (int c = 0; c < 4; c++) { 3349 if (inst->dst.writemask & (1 << c)) { 3350 acp[4 * inst->dst.index + c] = NULL; 3351 } 3352 } 3353 } 3354 3355 /* Clear where it's used as src. */ 3356 for (int r = 0; r < this->next_temp; r++) { 3357 for (int c = 0; c < 4; c++) { 3358 if (!acp[4 * r + c]) 3359 continue; 3360 3361 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3362 3363 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3364 acp[4 * r + c]->src[0].index == inst->dst.index && 3365 inst->dst.writemask & (1 << src_chan)) 3366 { 3367 acp[4 * r + c] = NULL; 3368 } 3369 } 3370 } 3371 } 3372 break; 3373 } 3374 3375 /* If this is a copy, add it to the ACP. */ 3376 if (inst->op == TGSI_OPCODE_MOV && 3377 inst->dst.file == PROGRAM_TEMPORARY && 3378 !inst->dst.reladdr && 3379 !inst->saturate && 3380 !inst->src[0].reladdr && 3381 !inst->src[0].negate) { 3382 for (int i = 0; i < 4; i++) { 3383 if (inst->dst.writemask & (1 << i)) { 3384 acp[4 * inst->dst.index + i] = inst; 3385 acp_level[4 * inst->dst.index + i] = level; 3386 } 3387 } 3388 } 3389 } 3390 3391 ralloc_free(acp_level); 3392 ralloc_free(acp); 3393} 3394 3395/* 3396 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3397 * 3398 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3399 * will occur. As an example, a TXP production after copy propagation but 3400 * before this pass: 3401 * 3402 * 0: MOV TEMP[1], INPUT[4].xyyy; 3403 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3404 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3405 * 3406 * and after this pass: 3407 * 3408 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3409 * 3410 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3411 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3412 */ 3413void 3414glsl_to_tgsi_visitor::eliminate_dead_code(void) 3415{ 3416 int i; 3417 3418 for (i=0; i < this->next_temp; i++) { 3419 int last_read = get_last_temp_read(i); 3420 int j = 0; 3421 3422 foreach_iter(exec_list_iterator, iter, this->instructions) { 3423 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3424 3425 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3426 j > last_read) 3427 { 3428 iter.remove(); 3429 delete inst; 3430 } 3431 3432 j++; 3433 } 3434 } 3435} 3436 3437/* 3438 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3439 * code elimination. This is less primitive than eliminate_dead_code(), as it 3440 * is per-channel and can detect consecutive writes without a read between them 3441 * as dead code. However, there is some dead code that can be eliminated by 3442 * eliminate_dead_code() but not this function - for example, this function 3443 * cannot eliminate an instruction writing to a register that is never read and 3444 * is the only instruction writing to that register. 3445 * 3446 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3447 * will occur. 3448 */ 3449int 3450glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3451{ 3452 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3453 glsl_to_tgsi_instruction *, 3454 this->next_temp * 4); 3455 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3456 int level = 0; 3457 int removed = 0; 3458 3459 foreach_iter(exec_list_iterator, iter, this->instructions) { 3460 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3461 3462 assert(inst->dst.file != PROGRAM_TEMPORARY 3463 || inst->dst.index < this->next_temp); 3464 3465 switch (inst->op) { 3466 case TGSI_OPCODE_BGNLOOP: 3467 case TGSI_OPCODE_ENDLOOP: 3468 case TGSI_OPCODE_CONT: 3469 case TGSI_OPCODE_BRK: 3470 /* End of a basic block, clear the write array entirely. 3471 * 3472 * This keeps us from killing dead code when the writes are 3473 * on either side of a loop, even when the register isn't touched 3474 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3475 * dead code of this type, so it shouldn't make a difference as long as 3476 * the dead code elimination pass in the GLSL compiler does its job. 3477 */ 3478 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3479 break; 3480 3481 case TGSI_OPCODE_ENDIF: 3482 case TGSI_OPCODE_ELSE: 3483 /* Promote the recorded level of all channels written inside the 3484 * preceding if or else block to the level above the if/else block. 3485 */ 3486 for (int r = 0; r < this->next_temp; r++) { 3487 for (int c = 0; c < 4; c++) { 3488 if (!writes[4 * r + c]) 3489 continue; 3490 3491 if (write_level[4 * r + c] == level) 3492 write_level[4 * r + c] = level-1; 3493 } 3494 } 3495 3496 if(inst->op == TGSI_OPCODE_ENDIF) 3497 --level; 3498 3499 break; 3500 3501 case TGSI_OPCODE_IF: 3502 ++level; 3503 /* fallthrough to default case to mark the condition as read */ 3504 3505 default: 3506 /* Continuing the block, clear any channels from the write array that 3507 * are read by this instruction. 3508 */ 3509 for (unsigned i = 0; i < Elements(inst->src); i++) { 3510 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3511 /* Any temporary might be read, so no dead code elimination 3512 * across this instruction. 3513 */ 3514 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3515 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3516 /* Clear where it's used as src. */ 3517 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3518 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3519 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3520 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3521 3522 for (int c = 0; c < 4; c++) { 3523 if (src_chans & (1 << c)) { 3524 writes[4 * inst->src[i].index + c] = NULL; 3525 } 3526 } 3527 } 3528 } 3529 break; 3530 } 3531 3532 /* If this instruction writes to a temporary, add it to the write array. 3533 * If there is already an instruction in the write array for one or more 3534 * of the channels, flag that channel write as dead. 3535 */ 3536 if (inst->dst.file == PROGRAM_TEMPORARY && 3537 !inst->dst.reladdr && 3538 !inst->saturate) { 3539 for (int c = 0; c < 4; c++) { 3540 if (inst->dst.writemask & (1 << c)) { 3541 if (writes[4 * inst->dst.index + c]) { 3542 if (write_level[4 * inst->dst.index + c] < level) 3543 continue; 3544 else 3545 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3546 } 3547 writes[4 * inst->dst.index + c] = inst; 3548 write_level[4 * inst->dst.index + c] = level; 3549 } 3550 } 3551 } 3552 } 3553 3554 /* Anything still in the write array at this point is dead code. */ 3555 for (int r = 0; r < this->next_temp; r++) { 3556 for (int c = 0; c < 4; c++) { 3557 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3558 if (inst) 3559 inst->dead_mask |= (1 << c); 3560 } 3561 } 3562 3563 /* Now actually remove the instructions that are completely dead and update 3564 * the writemask of other instructions with dead channels. 3565 */ 3566 foreach_iter(exec_list_iterator, iter, this->instructions) { 3567 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3568 3569 if (!inst->dead_mask || !inst->dst.writemask) 3570 continue; 3571 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3572 iter.remove(); 3573 delete inst; 3574 removed++; 3575 } else 3576 inst->dst.writemask &= ~(inst->dead_mask); 3577 } 3578 3579 ralloc_free(write_level); 3580 ralloc_free(writes); 3581 3582 return removed; 3583} 3584 3585/* Merges temporary registers together where possible to reduce the number of 3586 * registers needed to run a program. 3587 * 3588 * Produces optimal code only after copy propagation and dead code elimination 3589 * have been run. */ 3590void 3591glsl_to_tgsi_visitor::merge_registers(void) 3592{ 3593 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3594 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3595 int i, j; 3596 3597 /* Read the indices of the last read and first write to each temp register 3598 * into an array so that we don't have to traverse the instruction list as 3599 * much. */ 3600 for (i=0; i < this->next_temp; i++) { 3601 last_reads[i] = get_last_temp_read(i); 3602 first_writes[i] = get_first_temp_write(i); 3603 } 3604 3605 /* Start looking for registers with non-overlapping usages that can be 3606 * merged together. */ 3607 for (i=0; i < this->next_temp; i++) { 3608 /* Don't touch unused registers. */ 3609 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3610 3611 for (j=0; j < this->next_temp; j++) { 3612 /* Don't touch unused registers. */ 3613 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3614 3615 /* We can merge the two registers if the first write to j is after or 3616 * in the same instruction as the last read from i. Note that the 3617 * register at index i will always be used earlier or at the same time 3618 * as the register at index j. */ 3619 if (first_writes[i] <= first_writes[j] && 3620 last_reads[i] <= first_writes[j]) 3621 { 3622 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3623 3624 /* Update the first_writes and last_reads arrays with the new 3625 * values for the merged register index, and mark the newly unused 3626 * register index as such. */ 3627 last_reads[i] = last_reads[j]; 3628 first_writes[j] = -1; 3629 last_reads[j] = -1; 3630 } 3631 } 3632 } 3633 3634 ralloc_free(last_reads); 3635 ralloc_free(first_writes); 3636} 3637 3638/* Reassign indices to temporary registers by reusing unused indices created 3639 * by optimization passes. */ 3640void 3641glsl_to_tgsi_visitor::renumber_registers(void) 3642{ 3643 int i = 0; 3644 int new_index = 0; 3645 3646 for (i=0; i < this->next_temp; i++) { 3647 if (get_first_temp_read(i) < 0) continue; 3648 if (i != new_index) 3649 rename_temp_register(i, new_index); 3650 new_index++; 3651 } 3652 3653 this->next_temp = new_index; 3654} 3655 3656/** 3657 * Returns a fragment program which implements the current pixel transfer ops. 3658 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3659 */ 3660extern "C" void 3661get_pixel_transfer_visitor(struct st_fragment_program *fp, 3662 glsl_to_tgsi_visitor *original, 3663 int scale_and_bias, int pixel_maps) 3664{ 3665 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3666 struct st_context *st = st_context(original->ctx); 3667 struct gl_program *prog = &fp->Base.Base; 3668 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3669 st_src_reg coord, src0; 3670 st_dst_reg dst0; 3671 glsl_to_tgsi_instruction *inst; 3672 3673 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3674 v->ctx = original->ctx; 3675 v->prog = prog; 3676 v->shader_program = NULL; 3677 v->glsl_version = original->glsl_version; 3678 v->native_integers = original->native_integers; 3679 v->options = original->options; 3680 v->next_temp = original->next_temp; 3681 v->num_address_regs = original->num_address_regs; 3682 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3683 v->indirect_addr_temps = original->indirect_addr_temps; 3684 v->indirect_addr_consts = original->indirect_addr_consts; 3685 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3686 v->num_immediates = original->num_immediates; 3687 3688 /* 3689 * Get initial pixel color from the texture. 3690 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3691 */ 3692 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3693 src0 = v->get_temp(glsl_type::vec4_type); 3694 dst0 = st_dst_reg(src0); 3695 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3696 inst->sampler = 0; 3697 inst->tex_target = TEXTURE_2D_INDEX; 3698 3699 prog->InputsRead |= FRAG_BIT_TEX0; 3700 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3701 v->samplers_used |= (1 << 0); 3702 3703 if (scale_and_bias) { 3704 static const gl_state_index scale_state[STATE_LENGTH] = 3705 { STATE_INTERNAL, STATE_PT_SCALE, 3706 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3707 static const gl_state_index bias_state[STATE_LENGTH] = 3708 { STATE_INTERNAL, STATE_PT_BIAS, 3709 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3710 GLint scale_p, bias_p; 3711 st_src_reg scale, bias; 3712 3713 scale_p = _mesa_add_state_reference(params, scale_state); 3714 bias_p = _mesa_add_state_reference(params, bias_state); 3715 3716 /* MAD colorTemp, colorTemp, scale, bias; */ 3717 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3718 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3719 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3720 } 3721 3722 if (pixel_maps) { 3723 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3724 st_dst_reg temp_dst = st_dst_reg(temp); 3725 3726 assert(st->pixel_xfer.pixelmap_texture); 3727 3728 /* With a little effort, we can do four pixel map look-ups with 3729 * two TEX instructions: 3730 */ 3731 3732 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3733 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3734 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3735 inst->sampler = 1; 3736 inst->tex_target = TEXTURE_2D_INDEX; 3737 3738 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3739 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3740 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3741 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3742 inst->sampler = 1; 3743 inst->tex_target = TEXTURE_2D_INDEX; 3744 3745 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3746 v->samplers_used |= (1 << 1); 3747 3748 /* MOV colorTemp, temp; */ 3749 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3750 } 3751 3752 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3753 * new visitor. */ 3754 foreach_iter(exec_list_iterator, iter, original->instructions) { 3755 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3756 glsl_to_tgsi_instruction *newinst; 3757 st_src_reg src_regs[3]; 3758 3759 if (inst->dst.file == PROGRAM_OUTPUT) 3760 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3761 3762 for (int i=0; i<3; i++) { 3763 src_regs[i] = inst->src[i]; 3764 if (src_regs[i].file == PROGRAM_INPUT && 3765 src_regs[i].index == FRAG_ATTRIB_COL0) 3766 { 3767 src_regs[i].file = PROGRAM_TEMPORARY; 3768 src_regs[i].index = src0.index; 3769 } 3770 else if (src_regs[i].file == PROGRAM_INPUT) 3771 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3772 } 3773 3774 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3775 newinst->tex_target = inst->tex_target; 3776 } 3777 3778 /* Make modifications to fragment program info. */ 3779 prog->Parameters = _mesa_combine_parameter_lists(params, 3780 original->prog->Parameters); 3781 _mesa_free_parameter_list(params); 3782 count_resources(v, prog); 3783 fp->glsl_to_tgsi = v; 3784} 3785 3786/** 3787 * Make fragment program for glBitmap: 3788 * Sample the texture and kill the fragment if the bit is 0. 3789 * This program will be combined with the user's fragment program. 3790 * 3791 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3792 */ 3793extern "C" void 3794get_bitmap_visitor(struct st_fragment_program *fp, 3795 glsl_to_tgsi_visitor *original, int samplerIndex) 3796{ 3797 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3798 struct st_context *st = st_context(original->ctx); 3799 struct gl_program *prog = &fp->Base.Base; 3800 st_src_reg coord, src0; 3801 st_dst_reg dst0; 3802 glsl_to_tgsi_instruction *inst; 3803 3804 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3805 v->ctx = original->ctx; 3806 v->prog = prog; 3807 v->shader_program = NULL; 3808 v->glsl_version = original->glsl_version; 3809 v->native_integers = original->native_integers; 3810 v->options = original->options; 3811 v->next_temp = original->next_temp; 3812 v->num_address_regs = original->num_address_regs; 3813 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3814 v->indirect_addr_temps = original->indirect_addr_temps; 3815 v->indirect_addr_consts = original->indirect_addr_consts; 3816 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3817 v->num_immediates = original->num_immediates; 3818 3819 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3820 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3821 src0 = v->get_temp(glsl_type::vec4_type); 3822 dst0 = st_dst_reg(src0); 3823 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3824 inst->sampler = samplerIndex; 3825 inst->tex_target = TEXTURE_2D_INDEX; 3826 3827 prog->InputsRead |= FRAG_BIT_TEX0; 3828 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3829 v->samplers_used |= (1 << samplerIndex); 3830 3831 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3832 src0.negate = NEGATE_XYZW; 3833 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3834 src0.swizzle = SWIZZLE_XXXX; 3835 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3836 3837 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3838 * new visitor. */ 3839 foreach_iter(exec_list_iterator, iter, original->instructions) { 3840 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3841 glsl_to_tgsi_instruction *newinst; 3842 st_src_reg src_regs[3]; 3843 3844 if (inst->dst.file == PROGRAM_OUTPUT) 3845 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3846 3847 for (int i=0; i<3; i++) { 3848 src_regs[i] = inst->src[i]; 3849 if (src_regs[i].file == PROGRAM_INPUT) 3850 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3851 } 3852 3853 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3854 newinst->tex_target = inst->tex_target; 3855 } 3856 3857 /* Make modifications to fragment program info. */ 3858 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3859 count_resources(v, prog); 3860 fp->glsl_to_tgsi = v; 3861} 3862 3863/* ------------------------- TGSI conversion stuff -------------------------- */ 3864struct label { 3865 unsigned branch_target; 3866 unsigned token; 3867}; 3868 3869/** 3870 * Intermediate state used during shader translation. 3871 */ 3872struct st_translate { 3873 struct ureg_program *ureg; 3874 3875 struct ureg_dst temps[MAX_TEMPS]; 3876 struct ureg_src *constants; 3877 struct ureg_src *immediates; 3878 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3879 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3880 struct ureg_dst address[1]; 3881 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3882 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3883 3884 const GLuint *inputMapping; 3885 const GLuint *outputMapping; 3886 3887 /* For every instruction that contains a label (eg CALL), keep 3888 * details so that we can go back afterwards and emit the correct 3889 * tgsi instruction number for each label. 3890 */ 3891 struct label *labels; 3892 unsigned labels_size; 3893 unsigned labels_count; 3894 3895 /* Keep a record of the tgsi instruction number that each mesa 3896 * instruction starts at, will be used to fix up labels after 3897 * translation. 3898 */ 3899 unsigned *insn; 3900 unsigned insn_size; 3901 unsigned insn_count; 3902 3903 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3904 3905 boolean error; 3906}; 3907 3908/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3909static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3910 TGSI_SEMANTIC_FACE, 3911 TGSI_SEMANTIC_VERTEXID, 3912 TGSI_SEMANTIC_INSTANCEID 3913}; 3914 3915/** 3916 * Make note of a branch to a label in the TGSI code. 3917 * After we've emitted all instructions, we'll go over the list 3918 * of labels built here and patch the TGSI code with the actual 3919 * location of each label. 3920 */ 3921static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3922{ 3923 unsigned i; 3924 3925 if (t->labels_count + 1 >= t->labels_size) { 3926 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3927 t->labels = (struct label *)realloc(t->labels, 3928 t->labels_size * sizeof(struct label)); 3929 if (t->labels == NULL) { 3930 static unsigned dummy; 3931 t->error = TRUE; 3932 return &dummy; 3933 } 3934 } 3935 3936 i = t->labels_count++; 3937 t->labels[i].branch_target = branch_target; 3938 return &t->labels[i].token; 3939} 3940 3941/** 3942 * Called prior to emitting the TGSI code for each instruction. 3943 * Allocate additional space for instructions if needed. 3944 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3945 * the next TGSI instruction. 3946 */ 3947static void set_insn_start(struct st_translate *t, unsigned start) 3948{ 3949 if (t->insn_count + 1 >= t->insn_size) { 3950 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3951 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3952 if (t->insn == NULL) { 3953 t->error = TRUE; 3954 return; 3955 } 3956 } 3957 3958 t->insn[t->insn_count++] = start; 3959} 3960 3961/** 3962 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 3963 */ 3964static struct ureg_src 3965emit_immediate(struct st_translate *t, 3966 gl_constant_value values[4], 3967 int type, int size) 3968{ 3969 struct ureg_program *ureg = t->ureg; 3970 3971 switch(type) 3972 { 3973 case GL_FLOAT: 3974 return ureg_DECL_immediate(ureg, &values[0].f, size); 3975 case GL_INT: 3976 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 3977 case GL_UNSIGNED_INT: 3978 case GL_BOOL: 3979 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 3980 default: 3981 assert(!"should not get here - type must be float, int, uint, or bool"); 3982 return ureg_src_undef(); 3983 } 3984} 3985 3986/** 3987 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 3988 */ 3989static struct ureg_dst 3990dst_register(struct st_translate *t, 3991 gl_register_file file, 3992 GLuint index) 3993{ 3994 switch(file) { 3995 case PROGRAM_UNDEFINED: 3996 return ureg_dst_undef(); 3997 3998 case PROGRAM_TEMPORARY: 3999 if (ureg_dst_is_undef(t->temps[index])) 4000 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4001 4002 return t->temps[index]; 4003 4004 case PROGRAM_OUTPUT: 4005 if (t->procType == TGSI_PROCESSOR_VERTEX) 4006 assert(index < VERT_RESULT_MAX); 4007 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4008 assert(index < FRAG_RESULT_MAX); 4009 else 4010 assert(index < GEOM_RESULT_MAX); 4011 4012 assert(t->outputMapping[index] < Elements(t->outputs)); 4013 4014 return t->outputs[t->outputMapping[index]]; 4015 4016 case PROGRAM_ADDRESS: 4017 return t->address[index]; 4018 4019 default: 4020 assert(!"unknown dst register file"); 4021 return ureg_dst_undef(); 4022 } 4023} 4024 4025/** 4026 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4027 */ 4028static struct ureg_src 4029src_register(struct st_translate *t, 4030 gl_register_file file, 4031 GLint index) 4032{ 4033 switch(file) { 4034 case PROGRAM_UNDEFINED: 4035 return ureg_src_undef(); 4036 4037 case PROGRAM_TEMPORARY: 4038 assert(index >= 0); 4039 assert(index < (int) Elements(t->temps)); 4040 if (ureg_dst_is_undef(t->temps[index])) 4041 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4042 return ureg_src(t->temps[index]); 4043 4044 case PROGRAM_NAMED_PARAM: 4045 case PROGRAM_ENV_PARAM: 4046 case PROGRAM_LOCAL_PARAM: 4047 case PROGRAM_UNIFORM: 4048 assert(index >= 0); 4049 return t->constants[index]; 4050 case PROGRAM_STATE_VAR: 4051 case PROGRAM_CONSTANT: /* ie, immediate */ 4052 if (index < 0) 4053 return ureg_DECL_constant(t->ureg, 0); 4054 else 4055 return t->constants[index]; 4056 4057 case PROGRAM_IMMEDIATE: 4058 return t->immediates[index]; 4059 4060 case PROGRAM_INPUT: 4061 assert(t->inputMapping[index] < Elements(t->inputs)); 4062 return t->inputs[t->inputMapping[index]]; 4063 4064 case PROGRAM_OUTPUT: 4065 assert(t->outputMapping[index] < Elements(t->outputs)); 4066 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4067 4068 case PROGRAM_ADDRESS: 4069 return ureg_src(t->address[index]); 4070 4071 case PROGRAM_SYSTEM_VALUE: 4072 assert(index < (int) Elements(t->systemValues)); 4073 return t->systemValues[index]; 4074 4075 default: 4076 assert(!"unknown src register file"); 4077 return ureg_src_undef(); 4078 } 4079} 4080 4081/** 4082 * Create a TGSI ureg_dst register from an st_dst_reg. 4083 */ 4084static struct ureg_dst 4085translate_dst(struct st_translate *t, 4086 const st_dst_reg *dst_reg, 4087 bool saturate, bool clamp_color) 4088{ 4089 struct ureg_dst dst = dst_register(t, 4090 dst_reg->file, 4091 dst_reg->index); 4092 4093 dst = ureg_writemask(dst, dst_reg->writemask); 4094 4095 if (saturate) 4096 dst = ureg_saturate(dst); 4097 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { 4098 /* Clamp colors for ARB_color_buffer_float. */ 4099 switch (t->procType) { 4100 case TGSI_PROCESSOR_VERTEX: 4101 /* XXX if the geometry shader is present, this must be done there 4102 * instead of here. */ 4103 if (dst_reg->index == VERT_RESULT_COL0 || 4104 dst_reg->index == VERT_RESULT_COL1 || 4105 dst_reg->index == VERT_RESULT_BFC0 || 4106 dst_reg->index == VERT_RESULT_BFC1) { 4107 dst = ureg_saturate(dst); 4108 } 4109 break; 4110 4111 case TGSI_PROCESSOR_FRAGMENT: 4112 if (dst_reg->index >= FRAG_RESULT_COLOR) { 4113 dst = ureg_saturate(dst); 4114 } 4115 break; 4116 } 4117 } 4118 4119 if (dst_reg->reladdr != NULL) 4120 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4121 4122 return dst; 4123} 4124 4125/** 4126 * Create a TGSI ureg_src register from an st_src_reg. 4127 */ 4128static struct ureg_src 4129translate_src(struct st_translate *t, const st_src_reg *src_reg) 4130{ 4131 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4132 4133 src = ureg_swizzle(src, 4134 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4135 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4136 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4137 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4138 4139 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4140 src = ureg_negate(src); 4141 4142 if (src_reg->reladdr != NULL) { 4143 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4144 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4145 * set the bit for src.Negate. So we have to do the operation manually 4146 * here to work around the compiler's problems. */ 4147 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4148 struct ureg_src addr = ureg_src(t->address[0]); 4149 src.Indirect = 1; 4150 src.IndirectFile = addr.File; 4151 src.IndirectIndex = addr.Index; 4152 src.IndirectSwizzle = addr.SwizzleX; 4153 4154 if (src_reg->file != PROGRAM_INPUT && 4155 src_reg->file != PROGRAM_OUTPUT) { 4156 /* If src_reg->index was negative, it was set to zero in 4157 * src_register(). Reassign it now. But don't do this 4158 * for input/output regs since they get remapped while 4159 * const buffers don't. 4160 */ 4161 src.Index = src_reg->index; 4162 } 4163 } 4164 4165 return src; 4166} 4167 4168static struct tgsi_texture_offset 4169translate_tex_offset(struct st_translate *t, 4170 const struct tgsi_texture_offset *in_offset) 4171{ 4172 struct tgsi_texture_offset offset; 4173 4174 assert(in_offset->File == PROGRAM_IMMEDIATE); 4175 4176 offset.File = TGSI_FILE_IMMEDIATE; 4177 offset.Index = in_offset->Index; 4178 offset.SwizzleX = in_offset->SwizzleX; 4179 offset.SwizzleY = in_offset->SwizzleY; 4180 offset.SwizzleZ = in_offset->SwizzleZ; 4181 offset.Padding = 0; 4182 4183 return offset; 4184} 4185 4186static void 4187compile_tgsi_instruction(struct st_translate *t, 4188 const glsl_to_tgsi_instruction *inst, 4189 bool clamp_dst_color_output) 4190{ 4191 struct ureg_program *ureg = t->ureg; 4192 GLuint i; 4193 struct ureg_dst dst[1]; 4194 struct ureg_src src[4]; 4195 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4196 4197 unsigned num_dst; 4198 unsigned num_src; 4199 4200 num_dst = num_inst_dst_regs(inst->op); 4201 num_src = num_inst_src_regs(inst->op); 4202 4203 if (num_dst) 4204 dst[0] = translate_dst(t, 4205 &inst->dst, 4206 inst->saturate, 4207 clamp_dst_color_output); 4208 4209 for (i = 0; i < num_src; i++) 4210 src[i] = translate_src(t, &inst->src[i]); 4211 4212 switch(inst->op) { 4213 case TGSI_OPCODE_BGNLOOP: 4214 case TGSI_OPCODE_CAL: 4215 case TGSI_OPCODE_ELSE: 4216 case TGSI_OPCODE_ENDLOOP: 4217 case TGSI_OPCODE_IF: 4218 assert(num_dst == 0); 4219 ureg_label_insn(ureg, 4220 inst->op, 4221 src, num_src, 4222 get_label(t, 4223 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4224 return; 4225 4226 case TGSI_OPCODE_TEX: 4227 case TGSI_OPCODE_TXB: 4228 case TGSI_OPCODE_TXD: 4229 case TGSI_OPCODE_TXL: 4230 case TGSI_OPCODE_TXP: 4231 case TGSI_OPCODE_TXQ: 4232 case TGSI_OPCODE_TXF: 4233 src[num_src++] = t->samplers[inst->sampler]; 4234 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4235 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4236 } 4237 ureg_tex_insn(ureg, 4238 inst->op, 4239 dst, num_dst, 4240 st_translate_texture_target(inst->tex_target, inst->tex_shadow), 4241 texoffsets, inst->tex_offset_num_offset, 4242 src, num_src); 4243 return; 4244 4245 case TGSI_OPCODE_SCS: 4246 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4247 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4248 break; 4249 4250 default: 4251 ureg_insn(ureg, 4252 inst->op, 4253 dst, num_dst, 4254 src, num_src); 4255 break; 4256 } 4257} 4258 4259/** 4260 * Emit the TGSI instructions for inverting and adjusting WPOS. 4261 * This code is unavoidable because it also depends on whether 4262 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4263 */ 4264static void 4265emit_wpos_adjustment( struct st_translate *t, 4266 const struct gl_program *program, 4267 boolean invert, 4268 GLfloat adjX, GLfloat adjY[2]) 4269{ 4270 struct ureg_program *ureg = t->ureg; 4271 4272 /* Fragment program uses fragment position input. 4273 * Need to replace instances of INPUT[WPOS] with temp T 4274 * where T = INPUT[WPOS] by y is inverted. 4275 */ 4276 static const gl_state_index wposTransformState[STATE_LENGTH] 4277 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4278 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4279 4280 /* XXX: note we are modifying the incoming shader here! Need to 4281 * do this before emitting the constant decls below, or this 4282 * will be missed: 4283 */ 4284 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4285 wposTransformState); 4286 4287 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4288 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4289 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4290 4291 /* First, apply the coordinate shift: */ 4292 if (adjX || adjY[0] || adjY[1]) { 4293 if (adjY[0] != adjY[1]) { 4294 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4295 * depending on whether inversion is actually going to be applied 4296 * or not, which is determined by testing against the inversion 4297 * state variable used below, which will be either +1 or -1. 4298 */ 4299 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 4300 4301 ureg_CMP(ureg, adj_temp, 4302 ureg_scalar(wpostrans, invert ? 2 : 0), 4303 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4304 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4305 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4306 } else { 4307 ureg_ADD(ureg, wpos_temp, wpos_input, 4308 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4309 } 4310 wpos_input = ureg_src(wpos_temp); 4311 } else { 4312 /* MOV wpos_temp, input[wpos] 4313 */ 4314 ureg_MOV( ureg, wpos_temp, wpos_input ); 4315 } 4316 4317 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4318 * inversion/identity, or the other way around if we're drawing to an FBO. 4319 */ 4320 if (invert) { 4321 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4322 */ 4323 ureg_MAD( ureg, 4324 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4325 wpos_input, 4326 ureg_scalar(wpostrans, 0), 4327 ureg_scalar(wpostrans, 1)); 4328 } else { 4329 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4330 */ 4331 ureg_MAD( ureg, 4332 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4333 wpos_input, 4334 ureg_scalar(wpostrans, 2), 4335 ureg_scalar(wpostrans, 3)); 4336 } 4337 4338 /* Use wpos_temp as position input from here on: 4339 */ 4340 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4341} 4342 4343 4344/** 4345 * Emit fragment position/ooordinate code. 4346 */ 4347static void 4348emit_wpos(struct st_context *st, 4349 struct st_translate *t, 4350 const struct gl_program *program, 4351 struct ureg_program *ureg) 4352{ 4353 const struct gl_fragment_program *fp = 4354 (const struct gl_fragment_program *) program; 4355 struct pipe_screen *pscreen = st->pipe->screen; 4356 GLfloat adjX = 0.0f; 4357 GLfloat adjY[2] = { 0.0f, 0.0f }; 4358 boolean invert = FALSE; 4359 4360 /* Query the pixel center conventions supported by the pipe driver and set 4361 * adjX, adjY to help out if it cannot handle the requested one internally. 4362 * 4363 * The bias of the y-coordinate depends on whether y-inversion takes place 4364 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4365 * drawing to an FBO (causes additional inversion), and whether the the pipe 4366 * driver origin and the requested origin differ (the latter condition is 4367 * stored in the 'invert' variable). 4368 * 4369 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4370 * 4371 * center shift only: 4372 * i -> h: +0.5 4373 * h -> i: -0.5 4374 * 4375 * inversion only: 4376 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4377 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4378 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4379 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4380 * 4381 * inversion and center shift: 4382 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4383 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4384 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4385 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4386 */ 4387 if (fp->OriginUpperLeft) { 4388 /* Fragment shader wants origin in upper-left */ 4389 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4390 /* the driver supports upper-left origin */ 4391 } 4392 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4393 /* the driver supports lower-left origin, need to invert Y */ 4394 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4395 invert = TRUE; 4396 } 4397 else 4398 assert(0); 4399 } 4400 else { 4401 /* Fragment shader wants origin in lower-left */ 4402 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4403 /* the driver supports lower-left origin */ 4404 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4405 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4406 /* the driver supports upper-left origin, need to invert Y */ 4407 invert = TRUE; 4408 else 4409 assert(0); 4410 } 4411 4412 if (fp->PixelCenterInteger) { 4413 /* Fragment shader wants pixel center integer */ 4414 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4415 /* the driver supports pixel center integer */ 4416 adjY[1] = 1.0f; 4417 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4418 } 4419 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4420 /* the driver supports pixel center half integer, need to bias X,Y */ 4421 adjX = -0.5f; 4422 adjY[0] = -0.5f; 4423 adjY[1] = 0.5f; 4424 } 4425 else 4426 assert(0); 4427 } 4428 else { 4429 /* Fragment shader wants pixel center half integer */ 4430 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4431 /* the driver supports pixel center half integer */ 4432 } 4433 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4434 /* the driver supports pixel center integer, need to bias X,Y */ 4435 adjX = adjY[0] = adjY[1] = 0.5f; 4436 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4437 } 4438 else 4439 assert(0); 4440 } 4441 4442 /* we invert after adjustment so that we avoid the MOV to temporary, 4443 * and reuse the adjustment ADD instead */ 4444 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4445} 4446 4447/** 4448 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4449 * TGSI uses +1 for front, -1 for back. 4450 * This function converts the TGSI value to the GL value. Simply clamping/ 4451 * saturating the value to [0,1] does the job. 4452 */ 4453static void 4454emit_face_var(struct st_translate *t) 4455{ 4456 struct ureg_program *ureg = t->ureg; 4457 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4458 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4459 4460 /* MOV_SAT face_temp, input[face] */ 4461 face_temp = ureg_saturate(face_temp); 4462 ureg_MOV(ureg, face_temp, face_input); 4463 4464 /* Use face_temp as face input from here on: */ 4465 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4466} 4467 4468static void 4469emit_edgeflags(struct st_translate *t) 4470{ 4471 struct ureg_program *ureg = t->ureg; 4472 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4473 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4474 4475 ureg_MOV(ureg, edge_dst, edge_src); 4476} 4477 4478/** 4479 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4480 * \param program the program to translate 4481 * \param numInputs number of input registers used 4482 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4483 * input indexes 4484 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4485 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4486 * each input 4487 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4488 * \param numOutputs number of output registers used 4489 * \param outputMapping maps Mesa fragment program outputs to TGSI 4490 * generic outputs 4491 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4492 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4493 * each output 4494 * 4495 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4496 */ 4497extern "C" enum pipe_error 4498st_translate_program( 4499 struct gl_context *ctx, 4500 uint procType, 4501 struct ureg_program *ureg, 4502 glsl_to_tgsi_visitor *program, 4503 const struct gl_program *proginfo, 4504 GLuint numInputs, 4505 const GLuint inputMapping[], 4506 const ubyte inputSemanticName[], 4507 const ubyte inputSemanticIndex[], 4508 const GLuint interpMode[], 4509 const GLboolean is_centroid[], 4510 GLuint numOutputs, 4511 const GLuint outputMapping[], 4512 const ubyte outputSemanticName[], 4513 const ubyte outputSemanticIndex[], 4514 boolean passthrough_edgeflags, 4515 boolean clamp_color) 4516{ 4517 struct st_translate *t; 4518 unsigned i; 4519 enum pipe_error ret = PIPE_OK; 4520 4521 assert(numInputs <= Elements(t->inputs)); 4522 assert(numOutputs <= Elements(t->outputs)); 4523 4524 t = CALLOC_STRUCT(st_translate); 4525 if (!t) { 4526 ret = PIPE_ERROR_OUT_OF_MEMORY; 4527 goto out; 4528 } 4529 4530 memset(t, 0, sizeof *t); 4531 4532 t->procType = procType; 4533 t->inputMapping = inputMapping; 4534 t->outputMapping = outputMapping; 4535 t->ureg = ureg; 4536 4537 if (program->shader_program) { 4538 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { 4539 struct gl_uniform_storage *const storage = 4540 &program->shader_program->UniformStorage[i]; 4541 4542 _mesa_uniform_detach_all_driver_storage(storage); 4543 } 4544 } 4545 4546 /* 4547 * Declare input attributes. 4548 */ 4549 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4550 for (i = 0; i < numInputs; i++) { 4551 t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, 4552 inputSemanticName[i], 4553 inputSemanticIndex[i], 4554 interpMode[i], 0, 4555 is_centroid[i]); 4556 } 4557 4558 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4559 /* Must do this after setting up t->inputs, and before 4560 * emitting constant references, below: 4561 */ 4562 emit_wpos(st_context(ctx), t, proginfo, ureg); 4563 } 4564 4565 if (proginfo->InputsRead & FRAG_BIT_FACE) 4566 emit_face_var(t); 4567 4568 /* 4569 * Declare output attributes. 4570 */ 4571 for (i = 0; i < numOutputs; i++) { 4572 switch (outputSemanticName[i]) { 4573 case TGSI_SEMANTIC_POSITION: 4574 t->outputs[i] = ureg_DECL_output(ureg, 4575 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4576 outputSemanticIndex[i]); 4577 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4578 break; 4579 case TGSI_SEMANTIC_STENCIL: 4580 t->outputs[i] = ureg_DECL_output(ureg, 4581 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4582 outputSemanticIndex[i]); 4583 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4584 break; 4585 case TGSI_SEMANTIC_COLOR: 4586 t->outputs[i] = ureg_DECL_output(ureg, 4587 TGSI_SEMANTIC_COLOR, 4588 outputSemanticIndex[i]); 4589 break; 4590 default: 4591 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4592 ret = PIPE_ERROR_BAD_INPUT; 4593 goto out; 4594 } 4595 } 4596 } 4597 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4598 for (i = 0; i < numInputs; i++) { 4599 t->inputs[i] = ureg_DECL_gs_input(ureg, 4600 i, 4601 inputSemanticName[i], 4602 inputSemanticIndex[i]); 4603 } 4604 4605 for (i = 0; i < numOutputs; i++) { 4606 t->outputs[i] = ureg_DECL_output(ureg, 4607 outputSemanticName[i], 4608 outputSemanticIndex[i]); 4609 } 4610 } 4611 else { 4612 assert(procType == TGSI_PROCESSOR_VERTEX); 4613 4614 for (i = 0; i < numInputs; i++) { 4615 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4616 } 4617 4618 for (i = 0; i < numOutputs; i++) { 4619 t->outputs[i] = ureg_DECL_output(ureg, 4620 outputSemanticName[i], 4621 outputSemanticIndex[i]); 4622 } 4623 if (passthrough_edgeflags) 4624 emit_edgeflags(t); 4625 } 4626 4627 /* Declare address register. 4628 */ 4629 if (program->num_address_regs > 0) { 4630 assert(program->num_address_regs == 1); 4631 t->address[0] = ureg_DECL_address(ureg); 4632 } 4633 4634 /* Declare misc input registers 4635 */ 4636 { 4637 GLbitfield sysInputs = proginfo->SystemValuesRead; 4638 unsigned numSys = 0; 4639 for (i = 0; sysInputs; i++) { 4640 if (sysInputs & (1 << i)) { 4641 unsigned semName = mesa_sysval_to_semantic[i]; 4642 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4643 if (semName == TGSI_SEMANTIC_INSTANCEID || 4644 semName == TGSI_SEMANTIC_VERTEXID) { 4645 /* From Gallium perspective, these system values are always 4646 * integer, and require native integer support. However, if 4647 * native integer is supported on the vertex stage but not the 4648 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 4649 * assumes these system values are floats. To resolve the 4650 * inconsistency, we insert a U2F. 4651 */ 4652 struct st_context *st = st_context(ctx); 4653 struct pipe_screen *pscreen = st->pipe->screen; 4654 assert(procType == TGSI_PROCESSOR_VERTEX); 4655 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 4656 if (!ctx->Const.NativeIntegers) { 4657 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 4658 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); 4659 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 4660 } 4661 } 4662 numSys++; 4663 sysInputs &= ~(1 << i); 4664 } 4665 } 4666 } 4667 4668 if (program->indirect_addr_temps) { 4669 /* If temps are accessed with indirect addressing, declare temporaries 4670 * in sequential order. Else, we declare them on demand elsewhere. 4671 * (Note: the number of temporaries is equal to program->next_temp) 4672 */ 4673 for (i = 0; i < (unsigned)program->next_temp; i++) { 4674 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4675 t->temps[i] = ureg_DECL_local_temporary(t->ureg); 4676 } 4677 } 4678 4679 /* Emit constants and uniforms. TGSI uses a single index space for these, 4680 * so we put all the translated regs in t->constants. 4681 */ 4682 if (proginfo->Parameters) { 4683 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4684 if (t->constants == NULL) { 4685 ret = PIPE_ERROR_OUT_OF_MEMORY; 4686 goto out; 4687 } 4688 4689 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4690 switch (proginfo->Parameters->Parameters[i].Type) { 4691 case PROGRAM_ENV_PARAM: 4692 case PROGRAM_LOCAL_PARAM: 4693 case PROGRAM_STATE_VAR: 4694 case PROGRAM_NAMED_PARAM: 4695 case PROGRAM_UNIFORM: 4696 t->constants[i] = ureg_DECL_constant(ureg, i); 4697 break; 4698 4699 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4700 * addressing of the const buffer. 4701 * FIXME: Be smarter and recognize param arrays: 4702 * indirect addressing is only valid within the referenced 4703 * array. 4704 */ 4705 case PROGRAM_CONSTANT: 4706 if (program->indirect_addr_consts) 4707 t->constants[i] = ureg_DECL_constant(ureg, i); 4708 else 4709 t->constants[i] = emit_immediate(t, 4710 proginfo->Parameters->ParameterValues[i], 4711 proginfo->Parameters->Parameters[i].DataType, 4712 4); 4713 break; 4714 default: 4715 break; 4716 } 4717 } 4718 } 4719 4720 /* Emit immediate values. 4721 */ 4722 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4723 if (t->immediates == NULL) { 4724 ret = PIPE_ERROR_OUT_OF_MEMORY; 4725 goto out; 4726 } 4727 i = 0; 4728 foreach_iter(exec_list_iterator, iter, program->immediates) { 4729 immediate_storage *imm = (immediate_storage *)iter.get(); 4730 assert(i < program->num_immediates); 4731 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4732 } 4733 assert(i == program->num_immediates); 4734 4735 /* texture samplers */ 4736 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4737 if (program->samplers_used & (1 << i)) { 4738 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4739 } 4740 } 4741 4742 /* Emit each instruction in turn: 4743 */ 4744 foreach_iter(exec_list_iterator, iter, program->instructions) { 4745 set_insn_start(t, ureg_get_instruction_number(ureg)); 4746 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), 4747 clamp_color); 4748 } 4749 4750 /* Fix up all emitted labels: 4751 */ 4752 for (i = 0; i < t->labels_count; i++) { 4753 ureg_fixup_label(ureg, t->labels[i].token, 4754 t->insn[t->labels[i].branch_target]); 4755 } 4756 4757 if (program->shader_program) { 4758 /* This has to be done last. Any operation the can cause 4759 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4760 * program constant) has to happen before creating this linkage. 4761 */ 4762 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4763 if (program->shader_program->_LinkedShaders[i] == NULL) 4764 continue; 4765 4766 _mesa_associate_uniform_storage(ctx, program->shader_program, 4767 program->shader_program->_LinkedShaders[i]->Program->Parameters); 4768 } 4769 } 4770 4771out: 4772 if (t) { 4773 FREE(t->insn); 4774 FREE(t->labels); 4775 FREE(t->constants); 4776 FREE(t->immediates); 4777 4778 if (t->error) { 4779 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4780 } 4781 4782 FREE(t); 4783 } 4784 4785 return ret; 4786} 4787/* ----------------------------- End TGSI code ------------------------------ */ 4788 4789/** 4790 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4791 * generating Mesa IR. 4792 */ 4793static struct gl_program * 4794get_mesa_program(struct gl_context *ctx, 4795 struct gl_shader_program *shader_program, 4796 struct gl_shader *shader) 4797{ 4798 glsl_to_tgsi_visitor* v; 4799 struct gl_program *prog; 4800 GLenum target; 4801 const char *target_string; 4802 bool progress; 4803 struct gl_shader_compiler_options *options = 4804 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4805 4806 switch (shader->Type) { 4807 case GL_VERTEX_SHADER: 4808 target = GL_VERTEX_PROGRAM_ARB; 4809 target_string = "vertex"; 4810 break; 4811 case GL_FRAGMENT_SHADER: 4812 target = GL_FRAGMENT_PROGRAM_ARB; 4813 target_string = "fragment"; 4814 break; 4815 case GL_GEOMETRY_SHADER: 4816 target = GL_GEOMETRY_PROGRAM_NV; 4817 target_string = "geometry"; 4818 break; 4819 default: 4820 assert(!"should not be reached"); 4821 return NULL; 4822 } 4823 4824 validate_ir_tree(shader->ir); 4825 4826 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4827 if (!prog) 4828 return NULL; 4829 prog->Parameters = _mesa_new_parameter_list(); 4830 v = new glsl_to_tgsi_visitor(); 4831 v->ctx = ctx; 4832 v->prog = prog; 4833 v->shader_program = shader_program; 4834 v->options = options; 4835 v->glsl_version = ctx->Const.GLSLVersion; 4836 v->native_integers = ctx->Const.NativeIntegers; 4837 4838 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4839 prog->Parameters); 4840 4841 /* Remove reads from output registers. */ 4842 lower_output_reads(shader->ir); 4843 4844 /* Emit intermediate IR for main(). */ 4845 visit_exec_list(shader->ir, v); 4846 4847 /* Now emit bodies for any functions that were used. */ 4848 do { 4849 progress = GL_FALSE; 4850 4851 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4852 function_entry *entry = (function_entry *)iter.get(); 4853 4854 if (!entry->bgn_inst) { 4855 v->current_function = entry; 4856 4857 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4858 entry->bgn_inst->function = entry; 4859 4860 visit_exec_list(&entry->sig->body, v); 4861 4862 glsl_to_tgsi_instruction *last; 4863 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4864 if (last->op != TGSI_OPCODE_RET) 4865 v->emit(NULL, TGSI_OPCODE_RET); 4866 4867 glsl_to_tgsi_instruction *end; 4868 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4869 end->function = entry; 4870 4871 progress = GL_TRUE; 4872 } 4873 } 4874 } while (progress); 4875 4876#if 0 4877 /* Print out some information (for debugging purposes) used by the 4878 * optimization passes. */ 4879 for (i=0; i < v->next_temp; i++) { 4880 int fr = v->get_first_temp_read(i); 4881 int fw = v->get_first_temp_write(i); 4882 int lr = v->get_last_temp_read(i); 4883 int lw = v->get_last_temp_write(i); 4884 4885 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4886 assert(fw <= fr); 4887 } 4888#endif 4889 4890 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4891 v->simplify_cmp(); 4892 v->copy_propagate(); 4893 while (v->eliminate_dead_code_advanced()); 4894 4895 /* FIXME: These passes to optimize temporary registers don't work when there 4896 * is indirect addressing of the temporary register space. We need proper 4897 * array support so that we don't have to give up these passes in every 4898 * shader that uses arrays. 4899 */ 4900 if (!v->indirect_addr_temps) { 4901 v->eliminate_dead_code(); 4902 v->merge_registers(); 4903 v->renumber_registers(); 4904 } 4905 4906 /* Write the END instruction. */ 4907 v->emit(NULL, TGSI_OPCODE_END); 4908 4909 if (ctx->Shader.Flags & GLSL_DUMP) { 4910 printf("\n"); 4911 printf("GLSL IR for linked %s program %d:\n", target_string, 4912 shader_program->Name); 4913 _mesa_print_ir(shader->ir, NULL); 4914 printf("\n"); 4915 printf("\n"); 4916 fflush(stdout); 4917 } 4918 4919 prog->Instructions = NULL; 4920 prog->NumInstructions = 0; 4921 4922 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4923 count_resources(v, prog); 4924 4925 _mesa_reference_program(ctx, &shader->Program, prog); 4926 4927 /* This has to be done last. Any operation the can cause 4928 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4929 * program constant) has to happen before creating this linkage. 4930 */ 4931 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4932 if (!shader_program->LinkStatus) { 4933 return NULL; 4934 } 4935 4936 struct st_vertex_program *stvp; 4937 struct st_fragment_program *stfp; 4938 struct st_geometry_program *stgp; 4939 4940 switch (shader->Type) { 4941 case GL_VERTEX_SHADER: 4942 stvp = (struct st_vertex_program *)prog; 4943 stvp->glsl_to_tgsi = v; 4944 break; 4945 case GL_FRAGMENT_SHADER: 4946 stfp = (struct st_fragment_program *)prog; 4947 stfp->glsl_to_tgsi = v; 4948 break; 4949 case GL_GEOMETRY_SHADER: 4950 stgp = (struct st_geometry_program *)prog; 4951 stgp->glsl_to_tgsi = v; 4952 break; 4953 default: 4954 assert(!"should not be reached"); 4955 return NULL; 4956 } 4957 4958 return prog; 4959} 4960 4961extern "C" { 4962 4963struct gl_shader * 4964st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4965{ 4966 struct gl_shader *shader; 4967 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4968 type == GL_GEOMETRY_SHADER_ARB); 4969 shader = rzalloc(NULL, struct gl_shader); 4970 if (shader) { 4971 shader->Type = type; 4972 shader->Name = name; 4973 _mesa_init_shader(ctx, shader); 4974 } 4975 return shader; 4976} 4977 4978struct gl_shader_program * 4979st_new_shader_program(struct gl_context *ctx, GLuint name) 4980{ 4981 struct gl_shader_program *shProg; 4982 shProg = rzalloc(NULL, struct gl_shader_program); 4983 if (shProg) { 4984 shProg->Name = name; 4985 _mesa_init_shader_program(ctx, shProg); 4986 } 4987 return shProg; 4988} 4989 4990/** 4991 * Link a shader. 4992 * Called via ctx->Driver.LinkShader() 4993 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4994 * with code lowering and other optimizations. 4995 */ 4996GLboolean 4997st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4998{ 4999 assert(prog->LinkStatus); 5000 5001 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5002 if (prog->_LinkedShaders[i] == NULL) 5003 continue; 5004 5005 bool progress; 5006 exec_list *ir = prog->_LinkedShaders[i]->ir; 5007 const struct gl_shader_compiler_options *options = 5008 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 5009 5010 do { 5011 unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP | 5012 EXP_TO_EXP2 | LOG_TO_LOG2; 5013 if (options->EmitNoPow) 5014 what_to_lower |= POW_TO_EXP2; 5015 if (!ctx->Const.NativeIntegers) 5016 what_to_lower |= INT_DIV_TO_MUL_RCP; 5017 5018 progress = false; 5019 5020 /* Lowering */ 5021 do_mat_op_to_vec(ir); 5022 lower_instructions(ir, what_to_lower); 5023 5024 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5025 5026 progress = do_common_optimization(ir, true, true, 5027 options->MaxUnrollIterations) 5028 || progress; 5029 5030 progress = lower_quadop_vector(ir, false) || progress; 5031 5032 if (options->MaxIfDepth == 0) 5033 progress = lower_discard(ir) || progress; 5034 5035 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5036 5037 if (options->EmitNoNoise) 5038 progress = lower_noise(ir) || progress; 5039 5040 /* If there are forms of indirect addressing that the driver 5041 * cannot handle, perform the lowering pass. 5042 */ 5043 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5044 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5045 progress = 5046 lower_variable_index_to_cond_assign(ir, 5047 options->EmitNoIndirectInput, 5048 options->EmitNoIndirectOutput, 5049 options->EmitNoIndirectTemp, 5050 options->EmitNoIndirectUniform) 5051 || progress; 5052 5053 progress = do_vec_index_to_cond_assign(ir) || progress; 5054 } while (progress); 5055 5056 validate_ir_tree(ir); 5057 } 5058 5059 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5060 struct gl_program *linked_prog; 5061 5062 if (prog->_LinkedShaders[i] == NULL) 5063 continue; 5064 5065 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 5066 5067 if (linked_prog) { 5068 static const GLenum targets[] = { 5069 GL_VERTEX_PROGRAM_ARB, 5070 GL_FRAGMENT_PROGRAM_ARB, 5071 GL_GEOMETRY_PROGRAM_NV 5072 }; 5073 5074 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5075 linked_prog); 5076 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5077 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5078 NULL); 5079 _mesa_reference_program(ctx, &linked_prog, NULL); 5080 return GL_FALSE; 5081 } 5082 } 5083 5084 _mesa_reference_program(ctx, &linked_prog, NULL); 5085 } 5086 5087 return GL_TRUE; 5088} 5089 5090void 5091st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 5092 const GLuint outputMapping[], 5093 struct pipe_stream_output_info *so) 5094{ 5095 unsigned i; 5096 struct gl_transform_feedback_info *info = 5097 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5098 5099 for (i = 0; i < info->NumOutputs; i++) { 5100 so->output[i].register_index = 5101 outputMapping[info->Outputs[i].OutputRegister]; 5102 so->output[i].start_component = info->Outputs[i].ComponentOffset; 5103 so->output[i].num_components = info->Outputs[i].NumComponents; 5104 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5105 so->output[i].dst_offset = info->Outputs[i].DstOffset; 5106 } 5107 5108 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 5109 so->stride[i] = info->BufferStride[i]; 5110 } 5111 so->num_outputs = info->NumOutputs; 5112} 5113 5114} /* extern "C" */ 5115