st_glsl_to_tgsi.cpp revision 367b83f890f6f7922bc8f9aa528ab50f55674e9e
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45#include "main/mtypes.h" 46#include "main/shaderobj.h" 47#include "program/hash_table.h" 48 49extern "C" { 50#include "main/shaderapi.h" 51#include "main/uniforms.h" 52#include "program/prog_instruction.h" 53#include "program/prog_optimize.h" 54#include "program/prog_print.h" 55#include "program/program.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81/** 82 * Maximum number of temporary registers. 83 * 84 * It is too big for stack allocated arrays -- it will cause stack overflow on 85 * Windows and likely Mac OS X. 86 */ 87#define MAX_TEMPS 4096 88 89/* will be 4 for GLSL 4.00 */ 90#define MAX_GLSL_TEXTURE_OFFSET 1 91 92class st_src_reg; 93class st_dst_reg; 94 95static int swizzle_for_size(int size); 96 97/** 98 * This struct is a corresponding struct to TGSI ureg_src. 99 */ 100class st_src_reg { 101public: 102 st_src_reg(gl_register_file file, int index, const glsl_type *type) 103 { 104 this->file = file; 105 this->index = index; 106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 107 this->swizzle = swizzle_for_size(type->vector_elements); 108 else 109 this->swizzle = SWIZZLE_XYZW; 110 this->negate = 0; 111 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 112 this->reladdr = NULL; 113 } 114 115 st_src_reg(gl_register_file file, int index, int type) 116 { 117 this->type = type; 118 this->file = file; 119 this->index = index; 120 this->swizzle = SWIZZLE_XYZW; 121 this->negate = 0; 122 this->reladdr = NULL; 123 } 124 125 st_src_reg() 126 { 127 this->type = GLSL_TYPE_ERROR; 128 this->file = PROGRAM_UNDEFINED; 129 this->index = 0; 130 this->swizzle = 0; 131 this->negate = 0; 132 this->reladdr = NULL; 133 } 134 135 explicit st_src_reg(st_dst_reg reg); 136 137 gl_register_file file; /**< PROGRAM_* from Mesa */ 138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 140 int negate; /**< NEGATE_XYZW mask from mesa */ 141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 142 /** Register index should be offset by the integer in this reg. */ 143 st_src_reg *reladdr; 144}; 145 146class st_dst_reg { 147public: 148 st_dst_reg(gl_register_file file, int writemask, int type) 149 { 150 this->file = file; 151 this->index = 0; 152 this->writemask = writemask; 153 this->cond_mask = COND_TR; 154 this->reladdr = NULL; 155 this->type = type; 156 } 157 158 st_dst_reg() 159 { 160 this->type = GLSL_TYPE_ERROR; 161 this->file = PROGRAM_UNDEFINED; 162 this->index = 0; 163 this->writemask = 0; 164 this->cond_mask = COND_TR; 165 this->reladdr = NULL; 166 } 167 168 explicit st_dst_reg(st_src_reg reg); 169 170 gl_register_file file; /**< PROGRAM_* from Mesa */ 171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173 GLuint cond_mask:4; 174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175 /** Register index should be offset by the integer in this reg. */ 176 st_src_reg *reladdr; 177}; 178 179st_src_reg::st_src_reg(st_dst_reg reg) 180{ 181 this->type = reg.type; 182 this->file = reg.file; 183 this->index = reg.index; 184 this->swizzle = SWIZZLE_XYZW; 185 this->negate = 0; 186 this->reladdr = reg.reladdr; 187} 188 189st_dst_reg::st_dst_reg(st_src_reg reg) 190{ 191 this->type = reg.type; 192 this->file = reg.file; 193 this->index = reg.index; 194 this->writemask = WRITEMASK_XYZW; 195 this->cond_mask = COND_TR; 196 this->reladdr = reg.reladdr; 197} 198 199class glsl_to_tgsi_instruction : public exec_node { 200public: 201 /* Callers of this ralloc-based new need not call delete. It's 202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203 static void* operator new(size_t size, void *ctx) 204 { 205 void *node; 206 207 node = rzalloc_size(ctx, size); 208 assert(node != NULL); 209 210 return node; 211 } 212 213 unsigned op; 214 st_dst_reg dst; 215 st_src_reg src[3]; 216 /** Pointer to the ir source this tree came from for debugging */ 217 ir_instruction *ir; 218 GLboolean cond_update; 219 bool saturate; 220 int sampler; /**< sampler index */ 221 int tex_target; /**< One of TEXTURE_*_INDEX */ 222 GLboolean tex_shadow; 223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 224 unsigned tex_offset_num_offset; 225 int dead_mask; /**< Used in dead code elimination */ 226 227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228}; 229 230class variable_storage : public exec_node { 231public: 232 variable_storage(ir_variable *var, gl_register_file file, int index) 233 : file(file), index(index), var(var) 234 { 235 /* empty */ 236 } 237 238 gl_register_file file; 239 int index; 240 ir_variable *var; /* variable that maps to this, if any */ 241}; 242 243class immediate_storage : public exec_node { 244public: 245 immediate_storage(gl_constant_value *values, int size, int type) 246 { 247 memcpy(this->values, values, size * sizeof(gl_constant_value)); 248 this->size = size; 249 this->type = type; 250 } 251 252 gl_constant_value values[4]; 253 int size; /**< Number of components (1-4) */ 254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255}; 256 257class function_entry : public exec_node { 258public: 259 ir_function_signature *sig; 260 261 /** 262 * identifier of this function signature used by the program. 263 * 264 * At the point that TGSI instructions for function calls are 265 * generated, we don't know the address of the first instruction of 266 * the function body. So we make the BranchTarget that is called a 267 * small integer and rewrite them during set_branchtargets(). 268 */ 269 int sig_id; 270 271 /** 272 * Pointer to first instruction of the function body. 273 * 274 * Set during function body emits after main() is processed. 275 */ 276 glsl_to_tgsi_instruction *bgn_inst; 277 278 /** 279 * Index of the first instruction of the function body in actual TGSI. 280 * 281 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 282 */ 283 int inst; 284 285 /** Storage for the return value. */ 286 st_src_reg return_reg; 287}; 288 289class glsl_to_tgsi_visitor : public ir_visitor { 290public: 291 glsl_to_tgsi_visitor(); 292 ~glsl_to_tgsi_visitor(); 293 294 function_entry *current_function; 295 296 struct gl_context *ctx; 297 struct gl_program *prog; 298 struct gl_shader_program *shader_program; 299 struct gl_shader_compiler_options *options; 300 301 int next_temp; 302 303 int num_address_regs; 304 int samplers_used; 305 bool indirect_addr_temps; 306 bool indirect_addr_consts; 307 int num_clip_distances; 308 309 int glsl_version; 310 bool native_integers; 311 312 variable_storage *find_variable_storage(ir_variable *var); 313 314 int add_constant(gl_register_file file, gl_constant_value values[4], 315 int size, int datatype, GLuint *swizzle_out); 316 317 function_entry *get_function_signature(ir_function_signature *sig); 318 319 st_src_reg get_temp(const glsl_type *type); 320 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 321 322 st_src_reg st_src_reg_for_float(float val); 323 st_src_reg st_src_reg_for_int(int val); 324 st_src_reg st_src_reg_for_type(int type, int val); 325 326 /** 327 * \name Visit methods 328 * 329 * As typical for the visitor pattern, there must be one \c visit method for 330 * each concrete subclass of \c ir_instruction. Virtual base classes within 331 * the hierarchy should not have \c visit methods. 332 */ 333 /*@{*/ 334 virtual void visit(ir_variable *); 335 virtual void visit(ir_loop *); 336 virtual void visit(ir_loop_jump *); 337 virtual void visit(ir_function_signature *); 338 virtual void visit(ir_function *); 339 virtual void visit(ir_expression *); 340 virtual void visit(ir_swizzle *); 341 virtual void visit(ir_dereference_variable *); 342 virtual void visit(ir_dereference_array *); 343 virtual void visit(ir_dereference_record *); 344 virtual void visit(ir_assignment *); 345 virtual void visit(ir_constant *); 346 virtual void visit(ir_call *); 347 virtual void visit(ir_return *); 348 virtual void visit(ir_discard *); 349 virtual void visit(ir_texture *); 350 virtual void visit(ir_if *); 351 /*@}*/ 352 353 st_src_reg result; 354 355 /** List of variable_storage */ 356 exec_list variables; 357 358 /** List of immediate_storage */ 359 exec_list immediates; 360 int num_immediates; 361 362 /** List of function_entry */ 363 exec_list function_signatures; 364 int next_signature_id; 365 366 /** List of glsl_to_tgsi_instruction */ 367 exec_list instructions; 368 369 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 370 371 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 372 st_dst_reg dst, st_src_reg src0); 373 374 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 375 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 376 377 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 378 st_dst_reg dst, 379 st_src_reg src0, st_src_reg src1, st_src_reg src2); 380 381 unsigned get_opcode(ir_instruction *ir, unsigned op, 382 st_dst_reg dst, 383 st_src_reg src0, st_src_reg src1); 384 385 /** 386 * Emit the correct dot-product instruction for the type of arguments 387 */ 388 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 389 st_dst_reg dst, 390 st_src_reg src0, 391 st_src_reg src1, 392 unsigned elements); 393 394 void emit_scalar(ir_instruction *ir, unsigned op, 395 st_dst_reg dst, st_src_reg src0); 396 397 void emit_scalar(ir_instruction *ir, unsigned op, 398 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 399 400 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 401 402 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 403 404 void emit_scs(ir_instruction *ir, unsigned op, 405 st_dst_reg dst, const st_src_reg &src); 406 407 bool try_emit_mad(ir_expression *ir, 408 int mul_operand); 409 bool try_emit_mad_for_and_not(ir_expression *ir, 410 int mul_operand); 411 bool try_emit_sat(ir_expression *ir); 412 413 void emit_swz(ir_expression *ir); 414 415 bool process_move_condition(ir_rvalue *ir); 416 417 void remove_output_reads(gl_register_file type); 418 void simplify_cmp(void); 419 420 void rename_temp_register(int index, int new_index); 421 int get_first_temp_read(int index); 422 int get_first_temp_write(int index); 423 int get_last_temp_read(int index); 424 int get_last_temp_write(int index); 425 426 void copy_propagate(void); 427 void eliminate_dead_code(void); 428 int eliminate_dead_code_advanced(void); 429 void merge_registers(void); 430 void renumber_registers(void); 431 432 void *mem_ctx; 433}; 434 435static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 436 437static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 438 439static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 440 441static void 442fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 443 444static void 445fail_link(struct gl_shader_program *prog, const char *fmt, ...) 446{ 447 va_list args; 448 va_start(args, fmt); 449 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 450 va_end(args); 451 452 prog->LinkStatus = GL_FALSE; 453} 454 455static int 456swizzle_for_size(int size) 457{ 458 int size_swizzles[4] = { 459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 461 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 462 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 463 }; 464 465 assert((size >= 1) && (size <= 4)); 466 return size_swizzles[size - 1]; 467} 468 469static bool 470is_tex_instruction(unsigned opcode) 471{ 472 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 473 return info->is_tex; 474} 475 476static unsigned 477num_inst_dst_regs(unsigned opcode) 478{ 479 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 480 return info->num_dst; 481} 482 483static unsigned 484num_inst_src_regs(unsigned opcode) 485{ 486 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 487 return info->is_tex ? info->num_src - 1 : info->num_src; 488} 489 490glsl_to_tgsi_instruction * 491glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 492 st_dst_reg dst, 493 st_src_reg src0, st_src_reg src1, st_src_reg src2) 494{ 495 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 496 int num_reladdr = 0, i; 497 498 op = get_opcode(ir, op, dst, src0, src1); 499 500 /* If we have to do relative addressing, we want to load the ARL 501 * reg directly for one of the regs, and preload the other reladdr 502 * sources into temps. 503 */ 504 num_reladdr += dst.reladdr != NULL; 505 num_reladdr += src0.reladdr != NULL; 506 num_reladdr += src1.reladdr != NULL; 507 num_reladdr += src2.reladdr != NULL; 508 509 reladdr_to_temp(ir, &src2, &num_reladdr); 510 reladdr_to_temp(ir, &src1, &num_reladdr); 511 reladdr_to_temp(ir, &src0, &num_reladdr); 512 513 if (dst.reladdr) { 514 emit_arl(ir, address_reg, *dst.reladdr); 515 num_reladdr--; 516 } 517 assert(num_reladdr == 0); 518 519 inst->op = op; 520 inst->dst = dst; 521 inst->src[0] = src0; 522 inst->src[1] = src1; 523 inst->src[2] = src2; 524 inst->ir = ir; 525 inst->dead_mask = 0; 526 527 inst->function = NULL; 528 529 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 530 this->num_address_regs = 1; 531 532 /* Update indirect addressing status used by TGSI */ 533 if (dst.reladdr) { 534 switch(dst.file) { 535 case PROGRAM_TEMPORARY: 536 this->indirect_addr_temps = true; 537 break; 538 case PROGRAM_LOCAL_PARAM: 539 case PROGRAM_ENV_PARAM: 540 case PROGRAM_STATE_VAR: 541 case PROGRAM_NAMED_PARAM: 542 case PROGRAM_CONSTANT: 543 case PROGRAM_UNIFORM: 544 this->indirect_addr_consts = true; 545 break; 546 case PROGRAM_IMMEDIATE: 547 assert(!"immediates should not have indirect addressing"); 548 break; 549 default: 550 break; 551 } 552 } 553 else { 554 for (i=0; i<3; i++) { 555 if(inst->src[i].reladdr) { 556 switch(inst->src[i].file) { 557 case PROGRAM_TEMPORARY: 558 this->indirect_addr_temps = true; 559 break; 560 case PROGRAM_LOCAL_PARAM: 561 case PROGRAM_ENV_PARAM: 562 case PROGRAM_STATE_VAR: 563 case PROGRAM_NAMED_PARAM: 564 case PROGRAM_CONSTANT: 565 case PROGRAM_UNIFORM: 566 this->indirect_addr_consts = true; 567 break; 568 case PROGRAM_IMMEDIATE: 569 assert(!"immediates should not have indirect addressing"); 570 break; 571 default: 572 break; 573 } 574 } 575 } 576 } 577 578 this->instructions.push_tail(inst); 579 580 if (native_integers) 581 try_emit_float_set(ir, op, dst); 582 583 return inst; 584} 585 586 587glsl_to_tgsi_instruction * 588glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 589 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 590{ 591 return emit(ir, op, dst, src0, src1, undef_src); 592} 593 594glsl_to_tgsi_instruction * 595glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 596 st_dst_reg dst, st_src_reg src0) 597{ 598 assert(dst.writemask != 0); 599 return emit(ir, op, dst, src0, undef_src, undef_src); 600} 601 602glsl_to_tgsi_instruction * 603glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 604{ 605 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 606} 607 608 /** 609 * Emits the code to convert the result of float SET instructions to integers. 610 */ 611void 612glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 613 st_dst_reg dst) 614{ 615 if ((op == TGSI_OPCODE_SEQ || 616 op == TGSI_OPCODE_SNE || 617 op == TGSI_OPCODE_SGE || 618 op == TGSI_OPCODE_SLT)) 619 { 620 st_src_reg src = st_src_reg(dst); 621 src.negate = ~src.negate; 622 dst.type = GLSL_TYPE_FLOAT; 623 emit(ir, TGSI_OPCODE_F2I, dst, src); 624 } 625} 626 627/** 628 * Determines whether to use an integer, unsigned integer, or float opcode 629 * based on the operands and input opcode, then emits the result. 630 */ 631unsigned 632glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 633 st_dst_reg dst, 634 st_src_reg src0, st_src_reg src1) 635{ 636 int type = GLSL_TYPE_FLOAT; 637 638 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 639 type = GLSL_TYPE_FLOAT; 640 else if (native_integers) 641 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 642 643#define case4(c, f, i, u) \ 644 case TGSI_OPCODE_##c: \ 645 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 646 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 647 else op = TGSI_OPCODE_##f; \ 648 break; 649#define case3(f, i, u) case4(f, f, i, u) 650#define case2fi(f, i) case4(f, f, i, i) 651#define case2iu(i, u) case4(i, LAST, i, u) 652 653 switch(op) { 654 case2fi(ADD, UADD); 655 case2fi(MUL, UMUL); 656 case2fi(MAD, UMAD); 657 case3(DIV, IDIV, UDIV); 658 case3(MAX, IMAX, UMAX); 659 case3(MIN, IMIN, UMIN); 660 case2iu(MOD, UMOD); 661 662 case2fi(SEQ, USEQ); 663 case2fi(SNE, USNE); 664 case3(SGE, ISGE, USGE); 665 case3(SLT, ISLT, USLT); 666 667 case2iu(ISHR, USHR); 668 669 default: break; 670 } 671 672 assert(op != TGSI_OPCODE_LAST); 673 return op; 674} 675 676glsl_to_tgsi_instruction * 677glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 678 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 679 unsigned elements) 680{ 681 static const unsigned dot_opcodes[] = { 682 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 683 }; 684 685 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 686} 687 688/** 689 * Emits TGSI scalar opcodes to produce unique answers across channels. 690 * 691 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 692 * channel determines the result across all channels. So to do a vec4 693 * of this operation, we want to emit a scalar per source channel used 694 * to produce dest channels. 695 */ 696void 697glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 698 st_dst_reg dst, 699 st_src_reg orig_src0, st_src_reg orig_src1) 700{ 701 int i, j; 702 int done_mask = ~dst.writemask; 703 704 /* TGSI RCP is a scalar operation splatting results to all channels, 705 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 706 * dst channels. 707 */ 708 for (i = 0; i < 4; i++) { 709 GLuint this_mask = (1 << i); 710 glsl_to_tgsi_instruction *inst; 711 st_src_reg src0 = orig_src0; 712 st_src_reg src1 = orig_src1; 713 714 if (done_mask & this_mask) 715 continue; 716 717 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 718 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 719 for (j = i + 1; j < 4; j++) { 720 /* If there is another enabled component in the destination that is 721 * derived from the same inputs, generate its value on this pass as 722 * well. 723 */ 724 if (!(done_mask & (1 << j)) && 725 GET_SWZ(src0.swizzle, j) == src0_swiz && 726 GET_SWZ(src1.swizzle, j) == src1_swiz) { 727 this_mask |= (1 << j); 728 } 729 } 730 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 731 src0_swiz, src0_swiz); 732 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 733 src1_swiz, src1_swiz); 734 735 inst = emit(ir, op, dst, src0, src1); 736 inst->dst.writemask = this_mask; 737 done_mask |= this_mask; 738 } 739} 740 741void 742glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 743 st_dst_reg dst, st_src_reg src0) 744{ 745 st_src_reg undef = undef_src; 746 747 undef.swizzle = SWIZZLE_XXXX; 748 749 emit_scalar(ir, op, dst, src0, undef); 750} 751 752void 753glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 754 st_dst_reg dst, st_src_reg src0) 755{ 756 int op = TGSI_OPCODE_ARL; 757 758 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 759 op = TGSI_OPCODE_UARL; 760 761 emit(NULL, op, dst, src0); 762} 763 764/** 765 * Emit an TGSI_OPCODE_SCS instruction 766 * 767 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 768 * Instead of splatting its result across all four components of the 769 * destination, it writes one value to the \c x component and another value to 770 * the \c y component. 771 * 772 * \param ir IR instruction being processed 773 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 774 * on which value is desired. 775 * \param dst Destination register 776 * \param src Source register 777 */ 778void 779glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 780 st_dst_reg dst, 781 const st_src_reg &src) 782{ 783 /* Vertex programs cannot use the SCS opcode. 784 */ 785 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 786 emit_scalar(ir, op, dst, src); 787 return; 788 } 789 790 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 791 const unsigned scs_mask = (1U << component); 792 int done_mask = ~dst.writemask; 793 st_src_reg tmp; 794 795 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 796 797 /* If there are compnents in the destination that differ from the component 798 * that will be written by the SCS instrution, we'll need a temporary. 799 */ 800 if (scs_mask != unsigned(dst.writemask)) { 801 tmp = get_temp(glsl_type::vec4_type); 802 } 803 804 for (unsigned i = 0; i < 4; i++) { 805 unsigned this_mask = (1U << i); 806 st_src_reg src0 = src; 807 808 if ((done_mask & this_mask) != 0) 809 continue; 810 811 /* The source swizzle specified which component of the source generates 812 * sine / cosine for the current component in the destination. The SCS 813 * instruction requires that this value be swizzle to the X component. 814 * Replace the current swizzle with a swizzle that puts the source in 815 * the X component. 816 */ 817 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 818 819 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 820 src0_swiz, src0_swiz); 821 for (unsigned j = i + 1; j < 4; j++) { 822 /* If there is another enabled component in the destination that is 823 * derived from the same inputs, generate its value on this pass as 824 * well. 825 */ 826 if (!(done_mask & (1 << j)) && 827 GET_SWZ(src0.swizzle, j) == src0_swiz) { 828 this_mask |= (1 << j); 829 } 830 } 831 832 if (this_mask != scs_mask) { 833 glsl_to_tgsi_instruction *inst; 834 st_dst_reg tmp_dst = st_dst_reg(tmp); 835 836 /* Emit the SCS instruction. 837 */ 838 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 839 inst->dst.writemask = scs_mask; 840 841 /* Move the result of the SCS instruction to the desired location in 842 * the destination. 843 */ 844 tmp.swizzle = MAKE_SWIZZLE4(component, component, 845 component, component); 846 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 847 inst->dst.writemask = this_mask; 848 } else { 849 /* Emit the SCS instruction to write directly to the destination. 850 */ 851 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 852 inst->dst.writemask = scs_mask; 853 } 854 855 done_mask |= this_mask; 856 } 857} 858 859int 860glsl_to_tgsi_visitor::add_constant(gl_register_file file, 861 gl_constant_value values[4], int size, int datatype, 862 GLuint *swizzle_out) 863{ 864 if (file == PROGRAM_CONSTANT) { 865 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 866 size, datatype, swizzle_out); 867 } else { 868 int index = 0; 869 immediate_storage *entry; 870 assert(file == PROGRAM_IMMEDIATE); 871 872 /* Search immediate storage to see if we already have an identical 873 * immediate that we can use instead of adding a duplicate entry. 874 */ 875 foreach_iter(exec_list_iterator, iter, this->immediates) { 876 entry = (immediate_storage *)iter.get(); 877 878 if (entry->size == size && 879 entry->type == datatype && 880 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 881 return index; 882 } 883 index++; 884 } 885 886 /* Add this immediate to the list. */ 887 entry = new(mem_ctx) immediate_storage(values, size, datatype); 888 this->immediates.push_tail(entry); 889 this->num_immediates++; 890 return index; 891 } 892} 893 894st_src_reg 895glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 896{ 897 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 898 union gl_constant_value uval; 899 900 uval.f = val; 901 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 902 903 return src; 904} 905 906st_src_reg 907glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 908{ 909 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 910 union gl_constant_value uval; 911 912 assert(native_integers); 913 914 uval.i = val; 915 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 916 917 return src; 918} 919 920st_src_reg 921glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 922{ 923 if (native_integers) 924 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 925 st_src_reg_for_int(val); 926 else 927 return st_src_reg_for_float(val); 928} 929 930static int 931type_size(const struct glsl_type *type) 932{ 933 unsigned int i; 934 int size; 935 936 switch (type->base_type) { 937 case GLSL_TYPE_UINT: 938 case GLSL_TYPE_INT: 939 case GLSL_TYPE_FLOAT: 940 case GLSL_TYPE_BOOL: 941 if (type->is_matrix()) { 942 return type->matrix_columns; 943 } else { 944 /* Regardless of size of vector, it gets a vec4. This is bad 945 * packing for things like floats, but otherwise arrays become a 946 * mess. Hopefully a later pass over the code can pack scalars 947 * down if appropriate. 948 */ 949 return 1; 950 } 951 case GLSL_TYPE_ARRAY: 952 assert(type->length > 0); 953 return type_size(type->fields.array) * type->length; 954 case GLSL_TYPE_STRUCT: 955 size = 0; 956 for (i = 0; i < type->length; i++) { 957 size += type_size(type->fields.structure[i].type); 958 } 959 return size; 960 case GLSL_TYPE_SAMPLER: 961 /* Samplers take up one slot in UNIFORMS[], but they're baked in 962 * at link time. 963 */ 964 return 1; 965 default: 966 assert(0); 967 return 0; 968 } 969} 970 971/** 972 * In the initial pass of codegen, we assign temporary numbers to 973 * intermediate results. (not SSA -- variable assignments will reuse 974 * storage). 975 */ 976st_src_reg 977glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 978{ 979 st_src_reg src; 980 981 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 982 src.file = PROGRAM_TEMPORARY; 983 src.index = next_temp; 984 src.reladdr = NULL; 985 next_temp += type_size(type); 986 987 if (type->is_array() || type->is_record()) { 988 src.swizzle = SWIZZLE_NOOP; 989 } else { 990 src.swizzle = swizzle_for_size(type->vector_elements); 991 } 992 src.negate = 0; 993 994 return src; 995} 996 997variable_storage * 998glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 999{ 1000 1001 variable_storage *entry; 1002 1003 foreach_iter(exec_list_iterator, iter, this->variables) { 1004 entry = (variable_storage *)iter.get(); 1005 1006 if (entry->var == var) 1007 return entry; 1008 } 1009 1010 return NULL; 1011} 1012 1013void 1014glsl_to_tgsi_visitor::visit(ir_variable *ir) 1015{ 1016 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1017 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1018 1019 fp->OriginUpperLeft = ir->origin_upper_left; 1020 fp->PixelCenterInteger = ir->pixel_center_integer; 1021 } 1022 1023 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1024 unsigned int i; 1025 const ir_state_slot *const slots = ir->state_slots; 1026 assert(ir->state_slots != NULL); 1027 1028 /* Check if this statevar's setup in the STATE file exactly 1029 * matches how we'll want to reference it as a 1030 * struct/array/whatever. If not, then we need to move it into 1031 * temporary storage and hope that it'll get copy-propagated 1032 * out. 1033 */ 1034 for (i = 0; i < ir->num_state_slots; i++) { 1035 if (slots[i].swizzle != SWIZZLE_XYZW) { 1036 break; 1037 } 1038 } 1039 1040 variable_storage *storage; 1041 st_dst_reg dst; 1042 if (i == ir->num_state_slots) { 1043 /* We'll set the index later. */ 1044 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1045 this->variables.push_tail(storage); 1046 1047 dst = undef_dst; 1048 } else { 1049 /* The variable_storage constructor allocates slots based on the size 1050 * of the type. However, this had better match the number of state 1051 * elements that we're going to copy into the new temporary. 1052 */ 1053 assert((int) ir->num_state_slots == type_size(ir->type)); 1054 1055 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1056 this->next_temp); 1057 this->variables.push_tail(storage); 1058 this->next_temp += type_size(ir->type); 1059 1060 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1061 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1062 } 1063 1064 1065 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1066 int index = _mesa_add_state_reference(this->prog->Parameters, 1067 (gl_state_index *)slots[i].tokens); 1068 1069 if (storage->file == PROGRAM_STATE_VAR) { 1070 if (storage->index == -1) { 1071 storage->index = index; 1072 } else { 1073 assert(index == storage->index + (int)i); 1074 } 1075 } else { 1076 st_src_reg src(PROGRAM_STATE_VAR, index, 1077 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1078 src.swizzle = slots[i].swizzle; 1079 emit(ir, TGSI_OPCODE_MOV, dst, src); 1080 /* even a float takes up a whole vec4 reg in a struct/array. */ 1081 dst.index++; 1082 } 1083 } 1084 1085 if (storage->file == PROGRAM_TEMPORARY && 1086 dst.index != storage->index + (int) ir->num_state_slots) { 1087 fail_link(this->shader_program, 1088 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1089 ir->name, dst.index - storage->index, 1090 type_size(ir->type)); 1091 } 1092 } 1093} 1094 1095void 1096glsl_to_tgsi_visitor::visit(ir_loop *ir) 1097{ 1098 ir_dereference_variable *counter = NULL; 1099 1100 if (ir->counter != NULL) 1101 counter = new(ir) ir_dereference_variable(ir->counter); 1102 1103 if (ir->from != NULL) { 1104 assert(ir->counter != NULL); 1105 1106 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1107 1108 a->accept(this); 1109 delete a; 1110 } 1111 1112 emit(NULL, TGSI_OPCODE_BGNLOOP); 1113 1114 if (ir->to) { 1115 ir_expression *e = 1116 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1117 counter, ir->to); 1118 ir_if *if_stmt = new(ir) ir_if(e); 1119 1120 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1121 1122 if_stmt->then_instructions.push_tail(brk); 1123 1124 if_stmt->accept(this); 1125 1126 delete if_stmt; 1127 delete e; 1128 delete brk; 1129 } 1130 1131 visit_exec_list(&ir->body_instructions, this); 1132 1133 if (ir->increment) { 1134 ir_expression *e = 1135 new(ir) ir_expression(ir_binop_add, counter->type, 1136 counter, ir->increment); 1137 1138 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1139 1140 a->accept(this); 1141 delete a; 1142 delete e; 1143 } 1144 1145 emit(NULL, TGSI_OPCODE_ENDLOOP); 1146} 1147 1148void 1149glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1150{ 1151 switch (ir->mode) { 1152 case ir_loop_jump::jump_break: 1153 emit(NULL, TGSI_OPCODE_BRK); 1154 break; 1155 case ir_loop_jump::jump_continue: 1156 emit(NULL, TGSI_OPCODE_CONT); 1157 break; 1158 } 1159} 1160 1161 1162void 1163glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1164{ 1165 assert(0); 1166 (void)ir; 1167} 1168 1169void 1170glsl_to_tgsi_visitor::visit(ir_function *ir) 1171{ 1172 /* Ignore function bodies other than main() -- we shouldn't see calls to 1173 * them since they should all be inlined before we get to glsl_to_tgsi. 1174 */ 1175 if (strcmp(ir->name, "main") == 0) { 1176 const ir_function_signature *sig; 1177 exec_list empty; 1178 1179 sig = ir->matching_signature(&empty); 1180 1181 assert(sig); 1182 1183 foreach_iter(exec_list_iterator, iter, sig->body) { 1184 ir_instruction *ir = (ir_instruction *)iter.get(); 1185 1186 ir->accept(this); 1187 } 1188 } 1189} 1190 1191bool 1192glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1193{ 1194 int nonmul_operand = 1 - mul_operand; 1195 st_src_reg a, b, c; 1196 st_dst_reg result_dst; 1197 1198 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1199 if (!expr || expr->operation != ir_binop_mul) 1200 return false; 1201 1202 expr->operands[0]->accept(this); 1203 a = this->result; 1204 expr->operands[1]->accept(this); 1205 b = this->result; 1206 ir->operands[nonmul_operand]->accept(this); 1207 c = this->result; 1208 1209 this->result = get_temp(ir->type); 1210 result_dst = st_dst_reg(this->result); 1211 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1212 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1213 1214 return true; 1215} 1216 1217/** 1218 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1219 * 1220 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1221 * implemented using multiplication, and logical-or is implemented using 1222 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1223 * As result, the logical expression (a & !b) can be rewritten as: 1224 * 1225 * - a * !b 1226 * - a * (1 - b) 1227 * - (a * 1) - (a * b) 1228 * - a + -(a * b) 1229 * - a + (a * -b) 1230 * 1231 * This final expression can be implemented as a single MAD(a, -b, a) 1232 * instruction. 1233 */ 1234bool 1235glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1236{ 1237 const int other_operand = 1 - try_operand; 1238 st_src_reg a, b; 1239 1240 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1241 if (!expr || expr->operation != ir_unop_logic_not) 1242 return false; 1243 1244 ir->operands[other_operand]->accept(this); 1245 a = this->result; 1246 expr->operands[0]->accept(this); 1247 b = this->result; 1248 1249 b.negate = ~b.negate; 1250 1251 this->result = get_temp(ir->type); 1252 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1253 1254 return true; 1255} 1256 1257bool 1258glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1259{ 1260 /* Saturates were only introduced to vertex programs in 1261 * NV_vertex_program3, so don't give them to drivers in the VP. 1262 */ 1263 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1264 return false; 1265 1266 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1267 if (!sat_src) 1268 return false; 1269 1270 sat_src->accept(this); 1271 st_src_reg src = this->result; 1272 1273 /* If we generated an expression instruction into a temporary in 1274 * processing the saturate's operand, apply the saturate to that 1275 * instruction. Otherwise, generate a MOV to do the saturate. 1276 * 1277 * Note that we have to be careful to only do this optimization if 1278 * the instruction in question was what generated src->result. For 1279 * example, ir_dereference_array might generate a MUL instruction 1280 * to create the reladdr, and return us a src reg using that 1281 * reladdr. That MUL result is not the value we're trying to 1282 * saturate. 1283 */ 1284 ir_expression *sat_src_expr = sat_src->as_expression(); 1285 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1286 sat_src_expr->operation == ir_binop_add || 1287 sat_src_expr->operation == ir_binop_dot)) { 1288 glsl_to_tgsi_instruction *new_inst; 1289 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1290 new_inst->saturate = true; 1291 } else { 1292 this->result = get_temp(ir->type); 1293 st_dst_reg result_dst = st_dst_reg(this->result); 1294 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1295 glsl_to_tgsi_instruction *inst; 1296 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1297 inst->saturate = true; 1298 } 1299 1300 return true; 1301} 1302 1303void 1304glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1305 st_src_reg *reg, int *num_reladdr) 1306{ 1307 if (!reg->reladdr) 1308 return; 1309 1310 emit_arl(ir, address_reg, *reg->reladdr); 1311 1312 if (*num_reladdr != 1) { 1313 st_src_reg temp = get_temp(glsl_type::vec4_type); 1314 1315 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1316 *reg = temp; 1317 } 1318 1319 (*num_reladdr)--; 1320} 1321 1322void 1323glsl_to_tgsi_visitor::visit(ir_expression *ir) 1324{ 1325 unsigned int operand; 1326 st_src_reg op[Elements(ir->operands)]; 1327 st_src_reg result_src; 1328 st_dst_reg result_dst; 1329 1330 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1331 */ 1332 if (ir->operation == ir_binop_add) { 1333 if (try_emit_mad(ir, 1)) 1334 return; 1335 if (try_emit_mad(ir, 0)) 1336 return; 1337 } 1338 1339 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1340 */ 1341 if (ir->operation == ir_binop_logic_and) { 1342 if (try_emit_mad_for_and_not(ir, 1)) 1343 return; 1344 if (try_emit_mad_for_and_not(ir, 0)) 1345 return; 1346 } 1347 1348 if (try_emit_sat(ir)) 1349 return; 1350 1351 if (ir->operation == ir_quadop_vector) 1352 assert(!"ir_quadop_vector should have been lowered"); 1353 1354 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1355 this->result.file = PROGRAM_UNDEFINED; 1356 ir->operands[operand]->accept(this); 1357 if (this->result.file == PROGRAM_UNDEFINED) { 1358 ir_print_visitor v; 1359 printf("Failed to get tree for expression operand:\n"); 1360 ir->operands[operand]->accept(&v); 1361 exit(1); 1362 } 1363 op[operand] = this->result; 1364 1365 /* Matrix expression operands should have been broken down to vector 1366 * operations already. 1367 */ 1368 assert(!ir->operands[operand]->type->is_matrix()); 1369 } 1370 1371 int vector_elements = ir->operands[0]->type->vector_elements; 1372 if (ir->operands[1]) { 1373 vector_elements = MAX2(vector_elements, 1374 ir->operands[1]->type->vector_elements); 1375 } 1376 1377 this->result.file = PROGRAM_UNDEFINED; 1378 1379 /* Storage for our result. Ideally for an assignment we'd be using 1380 * the actual storage for the result here, instead. 1381 */ 1382 result_src = get_temp(ir->type); 1383 /* convenience for the emit functions below. */ 1384 result_dst = st_dst_reg(result_src); 1385 /* Limit writes to the channels that will be used by result_src later. 1386 * This does limit this temp's use as a temporary for multi-instruction 1387 * sequences. 1388 */ 1389 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1390 1391 switch (ir->operation) { 1392 case ir_unop_logic_not: 1393 if (result_dst.type != GLSL_TYPE_FLOAT) 1394 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1395 else { 1396 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1397 * older GPUs implement SEQ using multiple instructions (i915 uses two 1398 * SGE instructions and a MUL instruction). Since our logic values are 1399 * 0.0 and 1.0, 1-x also implements !x. 1400 */ 1401 op[0].negate = ~op[0].negate; 1402 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1403 } 1404 break; 1405 case ir_unop_neg: 1406 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1407 if (result_dst.type == GLSL_TYPE_INT) 1408 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1409 else { 1410 op[0].negate = ~op[0].negate; 1411 result_src = op[0]; 1412 } 1413 break; 1414 case ir_unop_abs: 1415 assert(result_dst.type == GLSL_TYPE_FLOAT); 1416 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1417 break; 1418 case ir_unop_sign: 1419 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1420 break; 1421 case ir_unop_rcp: 1422 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1423 break; 1424 1425 case ir_unop_exp2: 1426 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1427 break; 1428 case ir_unop_exp: 1429 case ir_unop_log: 1430 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1431 break; 1432 case ir_unop_log2: 1433 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1434 break; 1435 case ir_unop_sin: 1436 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1437 break; 1438 case ir_unop_cos: 1439 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1440 break; 1441 case ir_unop_sin_reduced: 1442 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1443 break; 1444 case ir_unop_cos_reduced: 1445 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1446 break; 1447 1448 case ir_unop_dFdx: 1449 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1450 break; 1451 case ir_unop_dFdy: 1452 op[0].negate = ~op[0].negate; 1453 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1454 break; 1455 1456 case ir_unop_noise: { 1457 /* At some point, a motivated person could add a better 1458 * implementation of noise. Currently not even the nvidia 1459 * binary drivers do anything more than this. In any case, the 1460 * place to do this is in the GL state tracker, not the poor 1461 * driver. 1462 */ 1463 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1464 break; 1465 } 1466 1467 case ir_binop_add: 1468 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1469 break; 1470 case ir_binop_sub: 1471 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1472 break; 1473 1474 case ir_binop_mul: 1475 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1476 break; 1477 case ir_binop_div: 1478 if (result_dst.type == GLSL_TYPE_FLOAT) 1479 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1480 else 1481 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1482 break; 1483 case ir_binop_mod: 1484 if (result_dst.type == GLSL_TYPE_FLOAT) 1485 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1486 else 1487 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1488 break; 1489 1490 case ir_binop_less: 1491 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1492 break; 1493 case ir_binop_greater: 1494 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1495 break; 1496 case ir_binop_lequal: 1497 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1498 break; 1499 case ir_binop_gequal: 1500 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1501 break; 1502 case ir_binop_equal: 1503 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1504 break; 1505 case ir_binop_nequal: 1506 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1507 break; 1508 case ir_binop_all_equal: 1509 /* "==" operator producing a scalar boolean. */ 1510 if (ir->operands[0]->type->is_vector() || 1511 ir->operands[1]->type->is_vector()) { 1512 st_src_reg temp = get_temp(native_integers ? 1513 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1514 glsl_type::vec4_type); 1515 1516 if (native_integers) { 1517 st_dst_reg temp_dst = st_dst_reg(temp); 1518 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1519 1520 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1521 1522 /* Emit 1-3 AND operations to combine the SEQ results. */ 1523 switch (ir->operands[0]->type->vector_elements) { 1524 case 2: 1525 break; 1526 case 3: 1527 temp_dst.writemask = WRITEMASK_Y; 1528 temp1.swizzle = SWIZZLE_YYYY; 1529 temp2.swizzle = SWIZZLE_ZZZZ; 1530 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1531 break; 1532 case 4: 1533 temp_dst.writemask = WRITEMASK_X; 1534 temp1.swizzle = SWIZZLE_XXXX; 1535 temp2.swizzle = SWIZZLE_YYYY; 1536 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1537 temp_dst.writemask = WRITEMASK_Y; 1538 temp1.swizzle = SWIZZLE_ZZZZ; 1539 temp2.swizzle = SWIZZLE_WWWW; 1540 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1541 } 1542 1543 temp1.swizzle = SWIZZLE_XXXX; 1544 temp2.swizzle = SWIZZLE_YYYY; 1545 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1546 } else { 1547 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1548 1549 /* After the dot-product, the value will be an integer on the 1550 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1551 */ 1552 emit_dp(ir, result_dst, temp, temp, vector_elements); 1553 1554 /* Negating the result of the dot-product gives values on the range 1555 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1556 * This is achieved using SGE. 1557 */ 1558 st_src_reg sge_src = result_src; 1559 sge_src.negate = ~sge_src.negate; 1560 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1561 } 1562 } else { 1563 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1564 } 1565 break; 1566 case ir_binop_any_nequal: 1567 /* "!=" operator producing a scalar boolean. */ 1568 if (ir->operands[0]->type->is_vector() || 1569 ir->operands[1]->type->is_vector()) { 1570 st_src_reg temp = get_temp(native_integers ? 1571 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1572 glsl_type::vec4_type); 1573 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1574 1575 if (native_integers) { 1576 st_dst_reg temp_dst = st_dst_reg(temp); 1577 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1578 1579 /* Emit 1-3 OR operations to combine the SNE results. */ 1580 switch (ir->operands[0]->type->vector_elements) { 1581 case 2: 1582 break; 1583 case 3: 1584 temp_dst.writemask = WRITEMASK_Y; 1585 temp1.swizzle = SWIZZLE_YYYY; 1586 temp2.swizzle = SWIZZLE_ZZZZ; 1587 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1588 break; 1589 case 4: 1590 temp_dst.writemask = WRITEMASK_X; 1591 temp1.swizzle = SWIZZLE_XXXX; 1592 temp2.swizzle = SWIZZLE_YYYY; 1593 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1594 temp_dst.writemask = WRITEMASK_Y; 1595 temp1.swizzle = SWIZZLE_ZZZZ; 1596 temp2.swizzle = SWIZZLE_WWWW; 1597 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1598 } 1599 1600 temp1.swizzle = SWIZZLE_XXXX; 1601 temp2.swizzle = SWIZZLE_YYYY; 1602 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1603 } else { 1604 /* After the dot-product, the value will be an integer on the 1605 * range [0,4]. Zero stays zero, and positive values become 1.0. 1606 */ 1607 glsl_to_tgsi_instruction *const dp = 1608 emit_dp(ir, result_dst, temp, temp, vector_elements); 1609 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1610 /* The clamping to [0,1] can be done for free in the fragment 1611 * shader with a saturate. 1612 */ 1613 dp->saturate = true; 1614 } else { 1615 /* Negating the result of the dot-product gives values on the range 1616 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1617 * achieved using SLT. 1618 */ 1619 st_src_reg slt_src = result_src; 1620 slt_src.negate = ~slt_src.negate; 1621 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1622 } 1623 } 1624 } else { 1625 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1626 } 1627 break; 1628 1629 case ir_unop_any: { 1630 assert(ir->operands[0]->type->is_vector()); 1631 1632 /* After the dot-product, the value will be an integer on the 1633 * range [0,4]. Zero stays zero, and positive values become 1.0. 1634 */ 1635 glsl_to_tgsi_instruction *const dp = 1636 emit_dp(ir, result_dst, op[0], op[0], 1637 ir->operands[0]->type->vector_elements); 1638 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1639 result_dst.type == GLSL_TYPE_FLOAT) { 1640 /* The clamping to [0,1] can be done for free in the fragment 1641 * shader with a saturate. 1642 */ 1643 dp->saturate = true; 1644 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1645 /* Negating the result of the dot-product gives values on the range 1646 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1647 * is achieved using SLT. 1648 */ 1649 st_src_reg slt_src = result_src; 1650 slt_src.negate = ~slt_src.negate; 1651 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1652 } 1653 else { 1654 /* Use SNE 0 if integers are being used as boolean values. */ 1655 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1656 } 1657 break; 1658 } 1659 1660 case ir_binop_logic_xor: 1661 if (native_integers) 1662 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1663 else 1664 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1665 break; 1666 1667 case ir_binop_logic_or: { 1668 if (native_integers) { 1669 /* If integers are used as booleans, we can use an actual "or" 1670 * instruction. 1671 */ 1672 assert(native_integers); 1673 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1674 } else { 1675 /* After the addition, the value will be an integer on the 1676 * range [0,2]. Zero stays zero, and positive values become 1.0. 1677 */ 1678 glsl_to_tgsi_instruction *add = 1679 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1680 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1681 /* The clamping to [0,1] can be done for free in the fragment 1682 * shader with a saturate if floats are being used as boolean values. 1683 */ 1684 add->saturate = true; 1685 } else { 1686 /* Negating the result of the addition gives values on the range 1687 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1688 * is achieved using SLT. 1689 */ 1690 st_src_reg slt_src = result_src; 1691 slt_src.negate = ~slt_src.negate; 1692 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1693 } 1694 } 1695 break; 1696 } 1697 1698 case ir_binop_logic_and: 1699 /* If native integers are disabled, the bool args are stored as float 0.0 1700 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1701 * actual AND opcode. 1702 */ 1703 if (native_integers) 1704 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1705 else 1706 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1707 break; 1708 1709 case ir_binop_dot: 1710 assert(ir->operands[0]->type->is_vector()); 1711 assert(ir->operands[0]->type == ir->operands[1]->type); 1712 emit_dp(ir, result_dst, op[0], op[1], 1713 ir->operands[0]->type->vector_elements); 1714 break; 1715 1716 case ir_unop_sqrt: 1717 /* sqrt(x) = x * rsq(x). */ 1718 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1719 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1720 /* For incoming channels <= 0, set the result to 0. */ 1721 op[0].negate = ~op[0].negate; 1722 emit(ir, TGSI_OPCODE_CMP, result_dst, 1723 op[0], result_src, st_src_reg_for_float(0.0)); 1724 break; 1725 case ir_unop_rsq: 1726 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1727 break; 1728 case ir_unop_i2f: 1729 if (native_integers) { 1730 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1731 break; 1732 } 1733 /* fallthrough to next case otherwise */ 1734 case ir_unop_b2f: 1735 if (native_integers) { 1736 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1737 break; 1738 } 1739 /* fallthrough to next case otherwise */ 1740 case ir_unop_i2u: 1741 case ir_unop_u2i: 1742 /* Converting between signed and unsigned integers is a no-op. */ 1743 result_src = op[0]; 1744 break; 1745 case ir_unop_b2i: 1746 if (native_integers) { 1747 /* Booleans are stored as integers using ~0 for true and 0 for false. 1748 * GLSL requires that int(bool) return 1 for true and 0 for false. 1749 * This conversion is done with AND, but it could be done with NEG. 1750 */ 1751 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1752 } else { 1753 /* Booleans and integers are both stored as floats when native 1754 * integers are disabled. 1755 */ 1756 result_src = op[0]; 1757 } 1758 break; 1759 case ir_unop_f2i: 1760 if (native_integers) 1761 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1762 else 1763 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1764 break; 1765 case ir_unop_f2b: 1766 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1767 break; 1768 case ir_unop_i2b: 1769 if (native_integers) 1770 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1771 else 1772 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1773 break; 1774 case ir_unop_trunc: 1775 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1776 break; 1777 case ir_unop_ceil: 1778 op[0].negate = ~op[0].negate; 1779 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1780 result_src.negate = ~result_src.negate; 1781 break; 1782 case ir_unop_floor: 1783 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1784 break; 1785 case ir_unop_round_even: 1786 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1787 break; 1788 case ir_unop_fract: 1789 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1790 break; 1791 1792 case ir_binop_min: 1793 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1794 break; 1795 case ir_binop_max: 1796 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1797 break; 1798 case ir_binop_pow: 1799 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1800 break; 1801 1802 case ir_unop_bit_not: 1803 if (native_integers) { 1804 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1805 break; 1806 } 1807 case ir_unop_u2f: 1808 if (native_integers) { 1809 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1810 break; 1811 } 1812 case ir_binop_lshift: 1813 if (native_integers) { 1814 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1815 break; 1816 } 1817 case ir_binop_rshift: 1818 if (native_integers) { 1819 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1820 break; 1821 } 1822 case ir_binop_bit_and: 1823 if (native_integers) { 1824 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1825 break; 1826 } 1827 case ir_binop_bit_xor: 1828 if (native_integers) { 1829 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1830 break; 1831 } 1832 case ir_binop_bit_or: 1833 if (native_integers) { 1834 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1835 break; 1836 } 1837 1838 assert(!"GLSL 1.30 features unsupported"); 1839 break; 1840 1841 case ir_quadop_vector: 1842 /* This operation should have already been handled. 1843 */ 1844 assert(!"Should not get here."); 1845 break; 1846 } 1847 1848 this->result = result_src; 1849} 1850 1851 1852void 1853glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1854{ 1855 st_src_reg src; 1856 int i; 1857 int swizzle[4]; 1858 1859 /* Note that this is only swizzles in expressions, not those on the left 1860 * hand side of an assignment, which do write masking. See ir_assignment 1861 * for that. 1862 */ 1863 1864 ir->val->accept(this); 1865 src = this->result; 1866 assert(src.file != PROGRAM_UNDEFINED); 1867 1868 for (i = 0; i < 4; i++) { 1869 if (i < ir->type->vector_elements) { 1870 switch (i) { 1871 case 0: 1872 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1873 break; 1874 case 1: 1875 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1876 break; 1877 case 2: 1878 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1879 break; 1880 case 3: 1881 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1882 break; 1883 } 1884 } else { 1885 /* If the type is smaller than a vec4, replicate the last 1886 * channel out. 1887 */ 1888 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1889 } 1890 } 1891 1892 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1893 1894 this->result = src; 1895} 1896 1897void 1898glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1899{ 1900 variable_storage *entry = find_variable_storage(ir->var); 1901 ir_variable *var = ir->var; 1902 1903 if (!entry) { 1904 switch (var->mode) { 1905 case ir_var_uniform: 1906 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1907 var->location); 1908 this->variables.push_tail(entry); 1909 break; 1910 case ir_var_in: 1911 case ir_var_inout: 1912 /* The linker assigns locations for varyings and attributes, 1913 * including deprecated builtins (like gl_Color), user-assign 1914 * generic attributes (glBindVertexLocation), and 1915 * user-defined varyings. 1916 * 1917 * FINISHME: We would hit this path for function arguments. Fix! 1918 */ 1919 assert(var->location != -1); 1920 entry = new(mem_ctx) variable_storage(var, 1921 PROGRAM_INPUT, 1922 var->location); 1923 break; 1924 case ir_var_out: 1925 assert(var->location != -1); 1926 entry = new(mem_ctx) variable_storage(var, 1927 PROGRAM_OUTPUT, 1928 var->location); 1929 break; 1930 case ir_var_system_value: 1931 entry = new(mem_ctx) variable_storage(var, 1932 PROGRAM_SYSTEM_VALUE, 1933 var->location); 1934 break; 1935 case ir_var_auto: 1936 case ir_var_temporary: 1937 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1938 this->next_temp); 1939 this->variables.push_tail(entry); 1940 1941 next_temp += type_size(var->type); 1942 break; 1943 } 1944 1945 if (!entry) { 1946 printf("Failed to make storage for %s\n", var->name); 1947 exit(1); 1948 } 1949 } 1950 1951 this->result = st_src_reg(entry->file, entry->index, var->type); 1952 if (!native_integers) 1953 this->result.type = GLSL_TYPE_FLOAT; 1954} 1955 1956void 1957glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1958{ 1959 ir_constant *index; 1960 st_src_reg src; 1961 int element_size = type_size(ir->type); 1962 1963 index = ir->array_index->constant_expression_value(); 1964 1965 ir->array->accept(this); 1966 src = this->result; 1967 1968 if (index) { 1969 src.index += index->value.i[0] * element_size; 1970 } else { 1971 /* Variable index array dereference. It eats the "vec4" of the 1972 * base of the array and an index that offsets the TGSI register 1973 * index. 1974 */ 1975 ir->array_index->accept(this); 1976 1977 st_src_reg index_reg; 1978 1979 if (element_size == 1) { 1980 index_reg = this->result; 1981 } else { 1982 index_reg = get_temp(native_integers ? 1983 glsl_type::int_type : glsl_type::float_type); 1984 1985 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1986 this->result, st_src_reg_for_type(index_reg.type, element_size)); 1987 } 1988 1989 /* If there was already a relative address register involved, add the 1990 * new and the old together to get the new offset. 1991 */ 1992 if (src.reladdr != NULL) { 1993 st_src_reg accum_reg = get_temp(native_integers ? 1994 glsl_type::int_type : glsl_type::float_type); 1995 1996 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 1997 index_reg, *src.reladdr); 1998 1999 index_reg = accum_reg; 2000 } 2001 2002 src.reladdr = ralloc(mem_ctx, st_src_reg); 2003 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2004 } 2005 2006 /* If the type is smaller than a vec4, replicate the last channel out. */ 2007 if (ir->type->is_scalar() || ir->type->is_vector()) 2008 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2009 else 2010 src.swizzle = SWIZZLE_NOOP; 2011 2012 this->result = src; 2013} 2014 2015void 2016glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2017{ 2018 unsigned int i; 2019 const glsl_type *struct_type = ir->record->type; 2020 int offset = 0; 2021 2022 ir->record->accept(this); 2023 2024 for (i = 0; i < struct_type->length; i++) { 2025 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2026 break; 2027 offset += type_size(struct_type->fields.structure[i].type); 2028 } 2029 2030 /* If the type is smaller than a vec4, replicate the last channel out. */ 2031 if (ir->type->is_scalar() || ir->type->is_vector()) 2032 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2033 else 2034 this->result.swizzle = SWIZZLE_NOOP; 2035 2036 this->result.index += offset; 2037} 2038 2039/** 2040 * We want to be careful in assignment setup to hit the actual storage 2041 * instead of potentially using a temporary like we might with the 2042 * ir_dereference handler. 2043 */ 2044static st_dst_reg 2045get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2046{ 2047 /* The LHS must be a dereference. If the LHS is a variable indexed array 2048 * access of a vector, it must be separated into a series conditional moves 2049 * before reaching this point (see ir_vec_index_to_cond_assign). 2050 */ 2051 assert(ir->as_dereference()); 2052 ir_dereference_array *deref_array = ir->as_dereference_array(); 2053 if (deref_array) { 2054 assert(!deref_array->array->type->is_vector()); 2055 } 2056 2057 /* Use the rvalue deref handler for the most part. We'll ignore 2058 * swizzles in it and write swizzles using writemask, though. 2059 */ 2060 ir->accept(v); 2061 return st_dst_reg(v->result); 2062} 2063 2064/** 2065 * Process the condition of a conditional assignment 2066 * 2067 * Examines the condition of a conditional assignment to generate the optimal 2068 * first operand of a \c CMP instruction. If the condition is a relational 2069 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2070 * used as the source for the \c CMP instruction. Otherwise the comparison 2071 * is processed to a boolean result, and the boolean result is used as the 2072 * operand to the CMP instruction. 2073 */ 2074bool 2075glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2076{ 2077 ir_rvalue *src_ir = ir; 2078 bool negate = true; 2079 bool switch_order = false; 2080 2081 ir_expression *const expr = ir->as_expression(); 2082 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2083 bool zero_on_left = false; 2084 2085 if (expr->operands[0]->is_zero()) { 2086 src_ir = expr->operands[1]; 2087 zero_on_left = true; 2088 } else if (expr->operands[1]->is_zero()) { 2089 src_ir = expr->operands[0]; 2090 zero_on_left = false; 2091 } 2092 2093 /* a is - 0 + - 0 + 2094 * (a < 0) T F F ( a < 0) T F F 2095 * (0 < a) F F T (-a < 0) F F T 2096 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2097 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2098 * (a > 0) F F T (-a < 0) F F T 2099 * (0 > a) T F F ( a < 0) T F F 2100 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2101 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2102 * 2103 * Note that exchanging the order of 0 and 'a' in the comparison simply 2104 * means that the value of 'a' should be negated. 2105 */ 2106 if (src_ir != ir) { 2107 switch (expr->operation) { 2108 case ir_binop_less: 2109 switch_order = false; 2110 negate = zero_on_left; 2111 break; 2112 2113 case ir_binop_greater: 2114 switch_order = false; 2115 negate = !zero_on_left; 2116 break; 2117 2118 case ir_binop_lequal: 2119 switch_order = true; 2120 negate = !zero_on_left; 2121 break; 2122 2123 case ir_binop_gequal: 2124 switch_order = true; 2125 negate = zero_on_left; 2126 break; 2127 2128 default: 2129 /* This isn't the right kind of comparison afterall, so make sure 2130 * the whole condition is visited. 2131 */ 2132 src_ir = ir; 2133 break; 2134 } 2135 } 2136 } 2137 2138 src_ir->accept(this); 2139 2140 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2141 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2142 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2143 * computing the condition. 2144 */ 2145 if (negate) 2146 this->result.negate = ~this->result.negate; 2147 2148 return switch_order; 2149} 2150 2151void 2152glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2153{ 2154 st_dst_reg l; 2155 st_src_reg r; 2156 int i; 2157 2158 ir->rhs->accept(this); 2159 r = this->result; 2160 2161 l = get_assignment_lhs(ir->lhs, this); 2162 2163 /* FINISHME: This should really set to the correct maximal writemask for each 2164 * FINISHME: component written (in the loops below). This case can only 2165 * FINISHME: occur for matrices, arrays, and structures. 2166 */ 2167 if (ir->write_mask == 0) { 2168 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2169 l.writemask = WRITEMASK_XYZW; 2170 } else if (ir->lhs->type->is_scalar() && 2171 ir->lhs->variable_referenced()->mode == ir_var_out) { 2172 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2173 * FINISHME: W component of fragment shader output zero, work correctly. 2174 */ 2175 l.writemask = WRITEMASK_XYZW; 2176 } else { 2177 int swizzles[4]; 2178 int first_enabled_chan = 0; 2179 int rhs_chan = 0; 2180 2181 l.writemask = ir->write_mask; 2182 2183 for (int i = 0; i < 4; i++) { 2184 if (l.writemask & (1 << i)) { 2185 first_enabled_chan = GET_SWZ(r.swizzle, i); 2186 break; 2187 } 2188 } 2189 2190 /* Swizzle a small RHS vector into the channels being written. 2191 * 2192 * glsl ir treats write_mask as dictating how many channels are 2193 * present on the RHS while TGSI treats write_mask as just 2194 * showing which channels of the vec4 RHS get written. 2195 */ 2196 for (int i = 0; i < 4; i++) { 2197 if (l.writemask & (1 << i)) 2198 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2199 else 2200 swizzles[i] = first_enabled_chan; 2201 } 2202 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2203 swizzles[2], swizzles[3]); 2204 } 2205 2206 assert(l.file != PROGRAM_UNDEFINED); 2207 assert(r.file != PROGRAM_UNDEFINED); 2208 2209 if (ir->condition) { 2210 const bool switch_order = this->process_move_condition(ir->condition); 2211 st_src_reg condition = this->result; 2212 2213 for (i = 0; i < type_size(ir->lhs->type); i++) { 2214 st_src_reg l_src = st_src_reg(l); 2215 st_src_reg condition_temp = condition; 2216 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2217 2218 if (native_integers) { 2219 /* This is necessary because TGSI's CMP instruction expects the 2220 * condition to be a float, and we store booleans as integers. 2221 * If TGSI had a UCMP instruction or similar, this extra 2222 * instruction would not be necessary. 2223 */ 2224 condition_temp = get_temp(glsl_type::vec4_type); 2225 condition.negate = 0; 2226 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2227 condition_temp.swizzle = condition.swizzle; 2228 } 2229 2230 if (switch_order) { 2231 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2232 } else { 2233 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2234 } 2235 2236 l.index++; 2237 r.index++; 2238 } 2239 } else if (ir->rhs->as_expression() && 2240 this->instructions.get_tail() && 2241 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2242 type_size(ir->lhs->type) == 1 && 2243 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2244 /* To avoid emitting an extra MOV when assigning an expression to a 2245 * variable, emit the last instruction of the expression again, but 2246 * replace the destination register with the target of the assignment. 2247 * Dead code elimination will remove the original instruction. 2248 */ 2249 glsl_to_tgsi_instruction *inst, *new_inst; 2250 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2251 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2252 new_inst->saturate = inst->saturate; 2253 inst->dead_mask = inst->dst.writemask; 2254 } else { 2255 for (i = 0; i < type_size(ir->lhs->type); i++) { 2256 emit(ir, TGSI_OPCODE_MOV, l, r); 2257 l.index++; 2258 r.index++; 2259 } 2260 } 2261} 2262 2263 2264void 2265glsl_to_tgsi_visitor::visit(ir_constant *ir) 2266{ 2267 st_src_reg src; 2268 GLfloat stack_vals[4] = { 0 }; 2269 gl_constant_value *values = (gl_constant_value *) stack_vals; 2270 GLenum gl_type = GL_NONE; 2271 unsigned int i; 2272 static int in_array = 0; 2273 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2274 2275 /* Unfortunately, 4 floats is all we can get into 2276 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2277 * aggregate constant and move each constant value into it. If we 2278 * get lucky, copy propagation will eliminate the extra moves. 2279 */ 2280 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2281 st_src_reg temp_base = get_temp(ir->type); 2282 st_dst_reg temp = st_dst_reg(temp_base); 2283 2284 foreach_iter(exec_list_iterator, iter, ir->components) { 2285 ir_constant *field_value = (ir_constant *)iter.get(); 2286 int size = type_size(field_value->type); 2287 2288 assert(size > 0); 2289 2290 field_value->accept(this); 2291 src = this->result; 2292 2293 for (i = 0; i < (unsigned int)size; i++) { 2294 emit(ir, TGSI_OPCODE_MOV, temp, src); 2295 2296 src.index++; 2297 temp.index++; 2298 } 2299 } 2300 this->result = temp_base; 2301 return; 2302 } 2303 2304 if (ir->type->is_array()) { 2305 st_src_reg temp_base = get_temp(ir->type); 2306 st_dst_reg temp = st_dst_reg(temp_base); 2307 int size = type_size(ir->type->fields.array); 2308 2309 assert(size > 0); 2310 in_array++; 2311 2312 for (i = 0; i < ir->type->length; i++) { 2313 ir->array_elements[i]->accept(this); 2314 src = this->result; 2315 for (int j = 0; j < size; j++) { 2316 emit(ir, TGSI_OPCODE_MOV, temp, src); 2317 2318 src.index++; 2319 temp.index++; 2320 } 2321 } 2322 this->result = temp_base; 2323 in_array--; 2324 return; 2325 } 2326 2327 if (ir->type->is_matrix()) { 2328 st_src_reg mat = get_temp(ir->type); 2329 st_dst_reg mat_column = st_dst_reg(mat); 2330 2331 for (i = 0; i < ir->type->matrix_columns; i++) { 2332 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2333 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2334 2335 src = st_src_reg(file, -1, ir->type->base_type); 2336 src.index = add_constant(file, 2337 values, 2338 ir->type->vector_elements, 2339 GL_FLOAT, 2340 &src.swizzle); 2341 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2342 2343 mat_column.index++; 2344 } 2345 2346 this->result = mat; 2347 return; 2348 } 2349 2350 switch (ir->type->base_type) { 2351 case GLSL_TYPE_FLOAT: 2352 gl_type = GL_FLOAT; 2353 for (i = 0; i < ir->type->vector_elements; i++) { 2354 values[i].f = ir->value.f[i]; 2355 } 2356 break; 2357 case GLSL_TYPE_UINT: 2358 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2359 for (i = 0; i < ir->type->vector_elements; i++) { 2360 if (native_integers) 2361 values[i].u = ir->value.u[i]; 2362 else 2363 values[i].f = ir->value.u[i]; 2364 } 2365 break; 2366 case GLSL_TYPE_INT: 2367 gl_type = native_integers ? GL_INT : GL_FLOAT; 2368 for (i = 0; i < ir->type->vector_elements; i++) { 2369 if (native_integers) 2370 values[i].i = ir->value.i[i]; 2371 else 2372 values[i].f = ir->value.i[i]; 2373 } 2374 break; 2375 case GLSL_TYPE_BOOL: 2376 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2377 for (i = 0; i < ir->type->vector_elements; i++) { 2378 if (native_integers) 2379 values[i].b = ir->value.b[i]; 2380 else 2381 values[i].f = ir->value.b[i]; 2382 } 2383 break; 2384 default: 2385 assert(!"Non-float/uint/int/bool constant"); 2386 } 2387 2388 this->result = st_src_reg(file, -1, ir->type); 2389 this->result.index = add_constant(file, 2390 values, 2391 ir->type->vector_elements, 2392 gl_type, 2393 &this->result.swizzle); 2394} 2395 2396function_entry * 2397glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2398{ 2399 function_entry *entry; 2400 2401 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2402 entry = (function_entry *)iter.get(); 2403 2404 if (entry->sig == sig) 2405 return entry; 2406 } 2407 2408 entry = ralloc(mem_ctx, function_entry); 2409 entry->sig = sig; 2410 entry->sig_id = this->next_signature_id++; 2411 entry->bgn_inst = NULL; 2412 2413 /* Allocate storage for all the parameters. */ 2414 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2415 ir_variable *param = (ir_variable *)iter.get(); 2416 variable_storage *storage; 2417 2418 storage = find_variable_storage(param); 2419 assert(!storage); 2420 2421 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2422 this->next_temp); 2423 this->variables.push_tail(storage); 2424 2425 this->next_temp += type_size(param->type); 2426 } 2427 2428 if (!sig->return_type->is_void()) { 2429 entry->return_reg = get_temp(sig->return_type); 2430 } else { 2431 entry->return_reg = undef_src; 2432 } 2433 2434 this->function_signatures.push_tail(entry); 2435 return entry; 2436} 2437 2438void 2439glsl_to_tgsi_visitor::visit(ir_call *ir) 2440{ 2441 glsl_to_tgsi_instruction *call_inst; 2442 ir_function_signature *sig = ir->get_callee(); 2443 function_entry *entry = get_function_signature(sig); 2444 int i; 2445 2446 /* Process in parameters. */ 2447 exec_list_iterator sig_iter = sig->parameters.iterator(); 2448 foreach_iter(exec_list_iterator, iter, *ir) { 2449 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2450 ir_variable *param = (ir_variable *)sig_iter.get(); 2451 2452 if (param->mode == ir_var_in || 2453 param->mode == ir_var_inout) { 2454 variable_storage *storage = find_variable_storage(param); 2455 assert(storage); 2456 2457 param_rval->accept(this); 2458 st_src_reg r = this->result; 2459 2460 st_dst_reg l; 2461 l.file = storage->file; 2462 l.index = storage->index; 2463 l.reladdr = NULL; 2464 l.writemask = WRITEMASK_XYZW; 2465 l.cond_mask = COND_TR; 2466 2467 for (i = 0; i < type_size(param->type); i++) { 2468 emit(ir, TGSI_OPCODE_MOV, l, r); 2469 l.index++; 2470 r.index++; 2471 } 2472 } 2473 2474 sig_iter.next(); 2475 } 2476 assert(!sig_iter.has_next()); 2477 2478 /* Emit call instruction */ 2479 call_inst = emit(ir, TGSI_OPCODE_CAL); 2480 call_inst->function = entry; 2481 2482 /* Process out parameters. */ 2483 sig_iter = sig->parameters.iterator(); 2484 foreach_iter(exec_list_iterator, iter, *ir) { 2485 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2486 ir_variable *param = (ir_variable *)sig_iter.get(); 2487 2488 if (param->mode == ir_var_out || 2489 param->mode == ir_var_inout) { 2490 variable_storage *storage = find_variable_storage(param); 2491 assert(storage); 2492 2493 st_src_reg r; 2494 r.file = storage->file; 2495 r.index = storage->index; 2496 r.reladdr = NULL; 2497 r.swizzle = SWIZZLE_NOOP; 2498 r.negate = 0; 2499 2500 param_rval->accept(this); 2501 st_dst_reg l = st_dst_reg(this->result); 2502 2503 for (i = 0; i < type_size(param->type); i++) { 2504 emit(ir, TGSI_OPCODE_MOV, l, r); 2505 l.index++; 2506 r.index++; 2507 } 2508 } 2509 2510 sig_iter.next(); 2511 } 2512 assert(!sig_iter.has_next()); 2513 2514 /* Process return value. */ 2515 this->result = entry->return_reg; 2516} 2517 2518void 2519glsl_to_tgsi_visitor::visit(ir_texture *ir) 2520{ 2521 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2522 st_dst_reg result_dst, coord_dst; 2523 glsl_to_tgsi_instruction *inst = NULL; 2524 unsigned opcode = TGSI_OPCODE_NOP; 2525 2526 if (ir->coordinate) { 2527 ir->coordinate->accept(this); 2528 2529 /* Put our coords in a temp. We'll need to modify them for shadow, 2530 * projection, or LOD, so the only case we'd use it as is is if 2531 * we're doing plain old texturing. The optimization passes on 2532 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2533 */ 2534 coord = get_temp(glsl_type::vec4_type); 2535 coord_dst = st_dst_reg(coord); 2536 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2537 } 2538 2539 if (ir->projector) { 2540 ir->projector->accept(this); 2541 projector = this->result; 2542 } 2543 2544 /* Storage for our result. Ideally for an assignment we'd be using 2545 * the actual storage for the result here, instead. 2546 */ 2547 result_src = get_temp(glsl_type::vec4_type); 2548 result_dst = st_dst_reg(result_src); 2549 2550 switch (ir->op) { 2551 case ir_tex: 2552 opcode = TGSI_OPCODE_TEX; 2553 break; 2554 case ir_txb: 2555 opcode = TGSI_OPCODE_TXB; 2556 ir->lod_info.bias->accept(this); 2557 lod_info = this->result; 2558 break; 2559 case ir_txl: 2560 opcode = TGSI_OPCODE_TXL; 2561 ir->lod_info.lod->accept(this); 2562 lod_info = this->result; 2563 break; 2564 case ir_txd: 2565 opcode = TGSI_OPCODE_TXD; 2566 ir->lod_info.grad.dPdx->accept(this); 2567 dx = this->result; 2568 ir->lod_info.grad.dPdy->accept(this); 2569 dy = this->result; 2570 break; 2571 case ir_txs: 2572 opcode = TGSI_OPCODE_TXQ; 2573 ir->lod_info.lod->accept(this); 2574 lod_info = this->result; 2575 break; 2576 case ir_txf: 2577 opcode = TGSI_OPCODE_TXF; 2578 ir->lod_info.lod->accept(this); 2579 lod_info = this->result; 2580 if (ir->offset) { 2581 ir->offset->accept(this); 2582 offset = this->result; 2583 } 2584 break; 2585 } 2586 2587 const glsl_type *sampler_type = ir->sampler->type; 2588 2589 if (ir->projector) { 2590 if (opcode == TGSI_OPCODE_TEX) { 2591 /* Slot the projector in as the last component of the coord. */ 2592 coord_dst.writemask = WRITEMASK_W; 2593 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2594 coord_dst.writemask = WRITEMASK_XYZW; 2595 opcode = TGSI_OPCODE_TXP; 2596 } else { 2597 st_src_reg coord_w = coord; 2598 coord_w.swizzle = SWIZZLE_WWWW; 2599 2600 /* For the other TEX opcodes there's no projective version 2601 * since the last slot is taken up by LOD info. Do the 2602 * projective divide now. 2603 */ 2604 coord_dst.writemask = WRITEMASK_W; 2605 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2606 2607 /* In the case where we have to project the coordinates "by hand," 2608 * the shadow comparator value must also be projected. 2609 */ 2610 st_src_reg tmp_src = coord; 2611 if (ir->shadow_comparitor) { 2612 /* Slot the shadow value in as the second to last component of the 2613 * coord. 2614 */ 2615 ir->shadow_comparitor->accept(this); 2616 2617 tmp_src = get_temp(glsl_type::vec4_type); 2618 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2619 2620 /* Projective division not allowed for array samplers. */ 2621 assert(!sampler_type->sampler_array); 2622 2623 tmp_dst.writemask = WRITEMASK_Z; 2624 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2625 2626 tmp_dst.writemask = WRITEMASK_XY; 2627 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2628 } 2629 2630 coord_dst.writemask = WRITEMASK_XYZ; 2631 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2632 2633 coord_dst.writemask = WRITEMASK_XYZW; 2634 coord.swizzle = SWIZZLE_XYZW; 2635 } 2636 } 2637 2638 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2639 * comparator was put in the correct place (and projected) by the code, 2640 * above, that handles by-hand projection. 2641 */ 2642 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2643 /* Slot the shadow value in as the second to last component of the 2644 * coord. 2645 */ 2646 ir->shadow_comparitor->accept(this); 2647 2648 /* XXX This will need to be updated for cubemap array samplers. */ 2649 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2650 sampler_type->sampler_array) { 2651 coord_dst.writemask = WRITEMASK_W; 2652 } else { 2653 coord_dst.writemask = WRITEMASK_Z; 2654 } 2655 2656 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2657 coord_dst.writemask = WRITEMASK_XYZW; 2658 } 2659 2660 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2661 opcode == TGSI_OPCODE_TXF) { 2662 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2663 coord_dst.writemask = WRITEMASK_W; 2664 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2665 coord_dst.writemask = WRITEMASK_XYZW; 2666 } 2667 2668 if (opcode == TGSI_OPCODE_TXD) 2669 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2670 else if (opcode == TGSI_OPCODE_TXQ) 2671 inst = emit(ir, opcode, result_dst, lod_info); 2672 else if (opcode == TGSI_OPCODE_TXF) { 2673 inst = emit(ir, opcode, result_dst, coord); 2674 } else 2675 inst = emit(ir, opcode, result_dst, coord); 2676 2677 if (ir->shadow_comparitor) 2678 inst->tex_shadow = GL_TRUE; 2679 2680 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2681 this->shader_program, 2682 this->prog); 2683 2684 if (ir->offset) { 2685 inst->tex_offset_num_offset = 1; 2686 inst->tex_offsets[0].Index = offset.index; 2687 inst->tex_offsets[0].File = offset.file; 2688 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2689 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2690 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2691 } 2692 2693 switch (sampler_type->sampler_dimensionality) { 2694 case GLSL_SAMPLER_DIM_1D: 2695 inst->tex_target = (sampler_type->sampler_array) 2696 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2697 break; 2698 case GLSL_SAMPLER_DIM_2D: 2699 inst->tex_target = (sampler_type->sampler_array) 2700 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2701 break; 2702 case GLSL_SAMPLER_DIM_3D: 2703 inst->tex_target = TEXTURE_3D_INDEX; 2704 break; 2705 case GLSL_SAMPLER_DIM_CUBE: 2706 inst->tex_target = TEXTURE_CUBE_INDEX; 2707 break; 2708 case GLSL_SAMPLER_DIM_RECT: 2709 inst->tex_target = TEXTURE_RECT_INDEX; 2710 break; 2711 case GLSL_SAMPLER_DIM_BUF: 2712 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2713 break; 2714 case GLSL_SAMPLER_DIM_EXTERNAL: 2715 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2716 break; 2717 default: 2718 assert(!"Should not get here."); 2719 } 2720 2721 this->result = result_src; 2722} 2723 2724void 2725glsl_to_tgsi_visitor::visit(ir_return *ir) 2726{ 2727 if (ir->get_value()) { 2728 st_dst_reg l; 2729 int i; 2730 2731 assert(current_function); 2732 2733 ir->get_value()->accept(this); 2734 st_src_reg r = this->result; 2735 2736 l = st_dst_reg(current_function->return_reg); 2737 2738 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2739 emit(ir, TGSI_OPCODE_MOV, l, r); 2740 l.index++; 2741 r.index++; 2742 } 2743 } 2744 2745 emit(ir, TGSI_OPCODE_RET); 2746} 2747 2748void 2749glsl_to_tgsi_visitor::visit(ir_discard *ir) 2750{ 2751 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2752 2753 if (ir->condition) { 2754 ir->condition->accept(this); 2755 this->result.negate = ~this->result.negate; 2756 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2757 } else { 2758 emit(ir, TGSI_OPCODE_KILP); 2759 } 2760 2761 fp->UsesKill = GL_TRUE; 2762} 2763 2764void 2765glsl_to_tgsi_visitor::visit(ir_if *ir) 2766{ 2767 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2768 glsl_to_tgsi_instruction *prev_inst; 2769 2770 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2771 2772 ir->condition->accept(this); 2773 assert(this->result.file != PROGRAM_UNDEFINED); 2774 2775 if (this->options->EmitCondCodes) { 2776 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2777 2778 /* See if we actually generated any instruction for generating 2779 * the condition. If not, then cook up a move to a temp so we 2780 * have something to set cond_update on. 2781 */ 2782 if (cond_inst == prev_inst) { 2783 st_src_reg temp = get_temp(glsl_type::bool_type); 2784 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2785 } 2786 cond_inst->cond_update = GL_TRUE; 2787 2788 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2789 if_inst->dst.cond_mask = COND_NE; 2790 } else { 2791 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2792 } 2793 2794 this->instructions.push_tail(if_inst); 2795 2796 visit_exec_list(&ir->then_instructions, this); 2797 2798 if (!ir->else_instructions.is_empty()) { 2799 emit(ir->condition, TGSI_OPCODE_ELSE); 2800 visit_exec_list(&ir->else_instructions, this); 2801 } 2802 2803 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2804} 2805 2806glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2807{ 2808 result.file = PROGRAM_UNDEFINED; 2809 next_temp = 1; 2810 next_signature_id = 1; 2811 num_immediates = 0; 2812 current_function = NULL; 2813 num_address_regs = 0; 2814 indirect_addr_temps = false; 2815 indirect_addr_consts = false; 2816 mem_ctx = ralloc_context(NULL); 2817} 2818 2819glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2820{ 2821 ralloc_free(mem_ctx); 2822} 2823 2824extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2825{ 2826 delete v; 2827} 2828 2829 2830/** 2831 * Count resources used by the given gpu program (number of texture 2832 * samplers, etc). 2833 */ 2834static void 2835count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2836{ 2837 v->samplers_used = 0; 2838 2839 foreach_iter(exec_list_iterator, iter, v->instructions) { 2840 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2841 2842 if (is_tex_instruction(inst->op)) { 2843 v->samplers_used |= 1 << inst->sampler; 2844 2845 prog->SamplerTargets[inst->sampler] = 2846 (gl_texture_index)inst->tex_target; 2847 if (inst->tex_shadow) { 2848 prog->ShadowSamplers |= 1 << inst->sampler; 2849 } 2850 } 2851 } 2852 2853 prog->SamplersUsed = v->samplers_used; 2854 _mesa_update_shader_textures_used(prog); 2855} 2856 2857static void 2858set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2859 struct gl_shader_program *shader_program, 2860 const char *name, const glsl_type *type, 2861 ir_constant *val) 2862{ 2863 if (type->is_record()) { 2864 ir_constant *field_constant; 2865 2866 field_constant = (ir_constant *)val->components.get_head(); 2867 2868 for (unsigned int i = 0; i < type->length; i++) { 2869 const glsl_type *field_type = type->fields.structure[i].type; 2870 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2871 type->fields.structure[i].name); 2872 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2873 field_type, field_constant); 2874 field_constant = (ir_constant *)field_constant->next; 2875 } 2876 return; 2877 } 2878 2879 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2880 2881 if (loc == -1) { 2882 fail_link(shader_program, 2883 "Couldn't find uniform for initializer %s\n", name); 2884 return; 2885 } 2886 2887 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2888 ir_constant *element; 2889 const glsl_type *element_type; 2890 if (type->is_array()) { 2891 element = val->array_elements[i]; 2892 element_type = type->fields.array; 2893 } else { 2894 element = val; 2895 element_type = type; 2896 } 2897 2898 void *values; 2899 2900 if (element_type->base_type == GLSL_TYPE_BOOL) { 2901 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2902 for (unsigned int j = 0; j < element_type->components(); j++) { 2903 conv[j] = element->value.b[j]; 2904 } 2905 values = (void *)conv; 2906 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2907 element_type->vector_elements, 2908 1); 2909 } else { 2910 values = &element->value; 2911 } 2912 2913 if (element_type->is_matrix()) { 2914 _mesa_uniform_matrix(ctx, shader_program, 2915 element_type->matrix_columns, 2916 element_type->vector_elements, 2917 loc, 1, GL_FALSE, (GLfloat *)values); 2918 } else { 2919 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2920 values, element_type->gl_type); 2921 } 2922 2923 loc++; 2924 } 2925} 2926 2927/* 2928 * Scan/rewrite program to remove reads of custom (output) registers. 2929 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2930 * (for vertex shaders). 2931 * In GLSL shaders, varying vars can be read and written. 2932 * On some hardware, trying to read an output register causes trouble. 2933 * So, rewrite the program to use a temporary register in this case. 2934 * 2935 * Based on _mesa_remove_output_reads from programopt.c. 2936 */ 2937void 2938glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2939{ 2940 GLuint i; 2941 GLint outputMap[VERT_RESULT_MAX]; 2942 GLint outputTypes[VERT_RESULT_MAX]; 2943 GLuint numVaryingReads = 0; 2944 GLboolean *usedTemps; 2945 GLuint firstTemp = 0; 2946 2947 usedTemps = new GLboolean[MAX_TEMPS]; 2948 if (!usedTemps) { 2949 return; 2950 } 2951 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2952 usedTemps, MAX_TEMPS); 2953 2954 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2955 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2956 2957 for (i = 0; i < VERT_RESULT_MAX; i++) 2958 outputMap[i] = -1; 2959 2960 /* look for instructions which read from varying vars */ 2961 foreach_iter(exec_list_iterator, iter, this->instructions) { 2962 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2963 const GLuint numSrc = num_inst_src_regs(inst->op); 2964 GLuint j; 2965 for (j = 0; j < numSrc; j++) { 2966 if (inst->src[j].file == type) { 2967 /* replace the read with a temp reg */ 2968 const GLuint var = inst->src[j].index; 2969 if (outputMap[var] == -1) { 2970 numVaryingReads++; 2971 outputMap[var] = _mesa_find_free_register(usedTemps, 2972 MAX_TEMPS, 2973 firstTemp); 2974 outputTypes[var] = inst->src[j].type; 2975 firstTemp = outputMap[var] + 1; 2976 } 2977 inst->src[j].file = PROGRAM_TEMPORARY; 2978 inst->src[j].index = outputMap[var]; 2979 } 2980 } 2981 } 2982 2983 delete [] usedTemps; 2984 2985 if (numVaryingReads == 0) 2986 return; /* nothing to be done */ 2987 2988 /* look for instructions which write to the varying vars identified above */ 2989 foreach_iter(exec_list_iterator, iter, this->instructions) { 2990 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2991 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 2992 /* change inst to write to the temp reg, instead of the varying */ 2993 inst->dst.file = PROGRAM_TEMPORARY; 2994 inst->dst.index = outputMap[inst->dst.index]; 2995 } 2996 } 2997 2998 /* insert new MOV instructions at the end */ 2999 for (i = 0; i < VERT_RESULT_MAX; i++) { 3000 if (outputMap[i] >= 0) { 3001 /* MOV VAR[i], TEMP[tmp]; */ 3002 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 3003 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 3004 dst.index = i; 3005 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 3006 } 3007 } 3008} 3009 3010/** 3011 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 3012 * are read from the given src in this instruction 3013 */ 3014static int 3015get_src_arg_mask(st_dst_reg dst, st_src_reg src) 3016{ 3017 int read_mask = 0, comp; 3018 3019 /* Now, given the src swizzle and the written channels, find which 3020 * components are actually read 3021 */ 3022 for (comp = 0; comp < 4; ++comp) { 3023 const unsigned coord = GET_SWZ(src.swizzle, comp); 3024 ASSERT(coord < 4); 3025 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 3026 read_mask |= 1 << coord; 3027 } 3028 3029 return read_mask; 3030} 3031 3032/** 3033 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 3034 * instruction is the first instruction to write to register T0. There are 3035 * several lowering passes done in GLSL IR (e.g. branches and 3036 * relative addressing) that create a large number of conditional assignments 3037 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 3038 * 3039 * Here is why this conversion is safe: 3040 * CMP T0, T1 T2 T0 can be expanded to: 3041 * if (T1 < 0.0) 3042 * MOV T0, T2; 3043 * else 3044 * MOV T0, T0; 3045 * 3046 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3047 * as the original program. If (T1 < 0.0) evaluates to false, executing 3048 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3049 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3050 * because any instruction that was going to read from T0 after this was going 3051 * to read a garbage value anyway. 3052 */ 3053void 3054glsl_to_tgsi_visitor::simplify_cmp(void) 3055{ 3056 unsigned *tempWrites; 3057 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3058 3059 tempWrites = new unsigned[MAX_TEMPS]; 3060 if (!tempWrites) { 3061 return; 3062 } 3063 memset(tempWrites, 0, sizeof(tempWrites)); 3064 memset(outputWrites, 0, sizeof(outputWrites)); 3065 3066 foreach_iter(exec_list_iterator, iter, this->instructions) { 3067 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3068 unsigned prevWriteMask = 0; 3069 3070 /* Give up if we encounter relative addressing or flow control. */ 3071 if (inst->dst.reladdr || 3072 tgsi_get_opcode_info(inst->op)->is_branch || 3073 inst->op == TGSI_OPCODE_BGNSUB || 3074 inst->op == TGSI_OPCODE_CONT || 3075 inst->op == TGSI_OPCODE_END || 3076 inst->op == TGSI_OPCODE_ENDSUB || 3077 inst->op == TGSI_OPCODE_RET) { 3078 break; 3079 } 3080 3081 if (inst->dst.file == PROGRAM_OUTPUT) { 3082 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3083 prevWriteMask = outputWrites[inst->dst.index]; 3084 outputWrites[inst->dst.index] |= inst->dst.writemask; 3085 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3086 assert(inst->dst.index < MAX_TEMPS); 3087 prevWriteMask = tempWrites[inst->dst.index]; 3088 tempWrites[inst->dst.index] |= inst->dst.writemask; 3089 } 3090 3091 /* For a CMP to be considered a conditional write, the destination 3092 * register and source register two must be the same. */ 3093 if (inst->op == TGSI_OPCODE_CMP 3094 && !(inst->dst.writemask & prevWriteMask) 3095 && inst->src[2].file == inst->dst.file 3096 && inst->src[2].index == inst->dst.index 3097 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3098 3099 inst->op = TGSI_OPCODE_MOV; 3100 inst->src[0] = inst->src[1]; 3101 } 3102 } 3103 3104 delete [] tempWrites; 3105} 3106 3107/* Replaces all references to a temporary register index with another index. */ 3108void 3109glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3110{ 3111 foreach_iter(exec_list_iterator, iter, this->instructions) { 3112 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3113 unsigned j; 3114 3115 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3116 if (inst->src[j].file == PROGRAM_TEMPORARY && 3117 inst->src[j].index == index) { 3118 inst->src[j].index = new_index; 3119 } 3120 } 3121 3122 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3123 inst->dst.index = new_index; 3124 } 3125 } 3126} 3127 3128int 3129glsl_to_tgsi_visitor::get_first_temp_read(int index) 3130{ 3131 int depth = 0; /* loop depth */ 3132 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3133 unsigned i = 0, j; 3134 3135 foreach_iter(exec_list_iterator, iter, this->instructions) { 3136 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3137 3138 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3139 if (inst->src[j].file == PROGRAM_TEMPORARY && 3140 inst->src[j].index == index) { 3141 return (depth == 0) ? i : loop_start; 3142 } 3143 } 3144 3145 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3146 if(depth++ == 0) 3147 loop_start = i; 3148 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3149 if (--depth == 0) 3150 loop_start = -1; 3151 } 3152 assert(depth >= 0); 3153 3154 i++; 3155 } 3156 3157 return -1; 3158} 3159 3160int 3161glsl_to_tgsi_visitor::get_first_temp_write(int index) 3162{ 3163 int depth = 0; /* loop depth */ 3164 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3165 int i = 0; 3166 3167 foreach_iter(exec_list_iterator, iter, this->instructions) { 3168 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3169 3170 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3171 return (depth == 0) ? i : loop_start; 3172 } 3173 3174 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3175 if(depth++ == 0) 3176 loop_start = i; 3177 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3178 if (--depth == 0) 3179 loop_start = -1; 3180 } 3181 assert(depth >= 0); 3182 3183 i++; 3184 } 3185 3186 return -1; 3187} 3188 3189int 3190glsl_to_tgsi_visitor::get_last_temp_read(int index) 3191{ 3192 int depth = 0; /* loop depth */ 3193 int last = -1; /* index of last instruction that reads the temporary */ 3194 unsigned i = 0, j; 3195 3196 foreach_iter(exec_list_iterator, iter, this->instructions) { 3197 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3198 3199 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3200 if (inst->src[j].file == PROGRAM_TEMPORARY && 3201 inst->src[j].index == index) { 3202 last = (depth == 0) ? i : -2; 3203 } 3204 } 3205 3206 if (inst->op == TGSI_OPCODE_BGNLOOP) 3207 depth++; 3208 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3209 if (--depth == 0 && last == -2) 3210 last = i; 3211 assert(depth >= 0); 3212 3213 i++; 3214 } 3215 3216 assert(last >= -1); 3217 return last; 3218} 3219 3220int 3221glsl_to_tgsi_visitor::get_last_temp_write(int index) 3222{ 3223 int depth = 0; /* loop depth */ 3224 int last = -1; /* index of last instruction that writes to the temporary */ 3225 int i = 0; 3226 3227 foreach_iter(exec_list_iterator, iter, this->instructions) { 3228 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3229 3230 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3231 last = (depth == 0) ? i : -2; 3232 3233 if (inst->op == TGSI_OPCODE_BGNLOOP) 3234 depth++; 3235 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3236 if (--depth == 0 && last == -2) 3237 last = i; 3238 assert(depth >= 0); 3239 3240 i++; 3241 } 3242 3243 assert(last >= -1); 3244 return last; 3245} 3246 3247/* 3248 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3249 * channels for copy propagation and updates following instructions to 3250 * use the original versions. 3251 * 3252 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3253 * will occur. As an example, a TXP production before this pass: 3254 * 3255 * 0: MOV TEMP[1], INPUT[4].xyyy; 3256 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3257 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3258 * 3259 * and after: 3260 * 3261 * 0: MOV TEMP[1], INPUT[4].xyyy; 3262 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3263 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3264 * 3265 * which allows for dead code elimination on TEMP[1]'s writes. 3266 */ 3267void 3268glsl_to_tgsi_visitor::copy_propagate(void) 3269{ 3270 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3271 glsl_to_tgsi_instruction *, 3272 this->next_temp * 4); 3273 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3274 int level = 0; 3275 3276 foreach_iter(exec_list_iterator, iter, this->instructions) { 3277 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3278 3279 assert(inst->dst.file != PROGRAM_TEMPORARY 3280 || inst->dst.index < this->next_temp); 3281 3282 /* First, do any copy propagation possible into the src regs. */ 3283 for (int r = 0; r < 3; r++) { 3284 glsl_to_tgsi_instruction *first = NULL; 3285 bool good = true; 3286 int acp_base = inst->src[r].index * 4; 3287 3288 if (inst->src[r].file != PROGRAM_TEMPORARY || 3289 inst->src[r].reladdr) 3290 continue; 3291 3292 /* See if we can find entries in the ACP consisting of MOVs 3293 * from the same src register for all the swizzled channels 3294 * of this src register reference. 3295 */ 3296 for (int i = 0; i < 4; i++) { 3297 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3298 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3299 3300 if (!copy_chan) { 3301 good = false; 3302 break; 3303 } 3304 3305 assert(acp_level[acp_base + src_chan] <= level); 3306 3307 if (!first) { 3308 first = copy_chan; 3309 } else { 3310 if (first->src[0].file != copy_chan->src[0].file || 3311 first->src[0].index != copy_chan->src[0].index) { 3312 good = false; 3313 break; 3314 } 3315 } 3316 } 3317 3318 if (good) { 3319 /* We've now validated that we can copy-propagate to 3320 * replace this src register reference. Do it. 3321 */ 3322 inst->src[r].file = first->src[0].file; 3323 inst->src[r].index = first->src[0].index; 3324 3325 int swizzle = 0; 3326 for (int i = 0; i < 4; i++) { 3327 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3328 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3329 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3330 (3 * i)); 3331 } 3332 inst->src[r].swizzle = swizzle; 3333 } 3334 } 3335 3336 switch (inst->op) { 3337 case TGSI_OPCODE_BGNLOOP: 3338 case TGSI_OPCODE_ENDLOOP: 3339 /* End of a basic block, clear the ACP entirely. */ 3340 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3341 break; 3342 3343 case TGSI_OPCODE_IF: 3344 ++level; 3345 break; 3346 3347 case TGSI_OPCODE_ENDIF: 3348 case TGSI_OPCODE_ELSE: 3349 /* Clear all channels written inside the block from the ACP, but 3350 * leaving those that were not touched. 3351 */ 3352 for (int r = 0; r < this->next_temp; r++) { 3353 for (int c = 0; c < 4; c++) { 3354 if (!acp[4 * r + c]) 3355 continue; 3356 3357 if (acp_level[4 * r + c] >= level) 3358 acp[4 * r + c] = NULL; 3359 } 3360 } 3361 if (inst->op == TGSI_OPCODE_ENDIF) 3362 --level; 3363 break; 3364 3365 default: 3366 /* Continuing the block, clear any written channels from 3367 * the ACP. 3368 */ 3369 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3370 /* Any temporary might be written, so no copy propagation 3371 * across this instruction. 3372 */ 3373 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3374 } else if (inst->dst.file == PROGRAM_OUTPUT && 3375 inst->dst.reladdr) { 3376 /* Any output might be written, so no copy propagation 3377 * from outputs across this instruction. 3378 */ 3379 for (int r = 0; r < this->next_temp; r++) { 3380 for (int c = 0; c < 4; c++) { 3381 if (!acp[4 * r + c]) 3382 continue; 3383 3384 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3385 acp[4 * r + c] = NULL; 3386 } 3387 } 3388 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3389 inst->dst.file == PROGRAM_OUTPUT) { 3390 /* Clear where it's used as dst. */ 3391 if (inst->dst.file == PROGRAM_TEMPORARY) { 3392 for (int c = 0; c < 4; c++) { 3393 if (inst->dst.writemask & (1 << c)) { 3394 acp[4 * inst->dst.index + c] = NULL; 3395 } 3396 } 3397 } 3398 3399 /* Clear where it's used as src. */ 3400 for (int r = 0; r < this->next_temp; r++) { 3401 for (int c = 0; c < 4; c++) { 3402 if (!acp[4 * r + c]) 3403 continue; 3404 3405 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3406 3407 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3408 acp[4 * r + c]->src[0].index == inst->dst.index && 3409 inst->dst.writemask & (1 << src_chan)) 3410 { 3411 acp[4 * r + c] = NULL; 3412 } 3413 } 3414 } 3415 } 3416 break; 3417 } 3418 3419 /* If this is a copy, add it to the ACP. */ 3420 if (inst->op == TGSI_OPCODE_MOV && 3421 inst->dst.file == PROGRAM_TEMPORARY && 3422 !inst->dst.reladdr && 3423 !inst->saturate && 3424 !inst->src[0].reladdr && 3425 !inst->src[0].negate) { 3426 for (int i = 0; i < 4; i++) { 3427 if (inst->dst.writemask & (1 << i)) { 3428 acp[4 * inst->dst.index + i] = inst; 3429 acp_level[4 * inst->dst.index + i] = level; 3430 } 3431 } 3432 } 3433 } 3434 3435 ralloc_free(acp_level); 3436 ralloc_free(acp); 3437} 3438 3439/* 3440 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3441 * 3442 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3443 * will occur. As an example, a TXP production after copy propagation but 3444 * before this pass: 3445 * 3446 * 0: MOV TEMP[1], INPUT[4].xyyy; 3447 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3448 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3449 * 3450 * and after this pass: 3451 * 3452 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3453 * 3454 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3455 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3456 */ 3457void 3458glsl_to_tgsi_visitor::eliminate_dead_code(void) 3459{ 3460 int i; 3461 3462 for (i=0; i < this->next_temp; i++) { 3463 int last_read = get_last_temp_read(i); 3464 int j = 0; 3465 3466 foreach_iter(exec_list_iterator, iter, this->instructions) { 3467 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3468 3469 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3470 j > last_read) 3471 { 3472 iter.remove(); 3473 delete inst; 3474 } 3475 3476 j++; 3477 } 3478 } 3479} 3480 3481/* 3482 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3483 * code elimination. This is less primitive than eliminate_dead_code(), as it 3484 * is per-channel and can detect consecutive writes without a read between them 3485 * as dead code. However, there is some dead code that can be eliminated by 3486 * eliminate_dead_code() but not this function - for example, this function 3487 * cannot eliminate an instruction writing to a register that is never read and 3488 * is the only instruction writing to that register. 3489 * 3490 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3491 * will occur. 3492 */ 3493int 3494glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3495{ 3496 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3497 glsl_to_tgsi_instruction *, 3498 this->next_temp * 4); 3499 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3500 int level = 0; 3501 int removed = 0; 3502 3503 foreach_iter(exec_list_iterator, iter, this->instructions) { 3504 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3505 3506 assert(inst->dst.file != PROGRAM_TEMPORARY 3507 || inst->dst.index < this->next_temp); 3508 3509 switch (inst->op) { 3510 case TGSI_OPCODE_BGNLOOP: 3511 case TGSI_OPCODE_ENDLOOP: 3512 case TGSI_OPCODE_CONT: 3513 case TGSI_OPCODE_BRK: 3514 /* End of a basic block, clear the write array entirely. 3515 * 3516 * This keeps us from killing dead code when the writes are 3517 * on either side of a loop, even when the register isn't touched 3518 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3519 * dead code of this type, so it shouldn't make a difference as long as 3520 * the dead code elimination pass in the GLSL compiler does its job. 3521 */ 3522 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3523 break; 3524 3525 case TGSI_OPCODE_ENDIF: 3526 case TGSI_OPCODE_ELSE: 3527 /* Promote the recorded level of all channels written inside the 3528 * preceding if or else block to the level above the if/else block. 3529 */ 3530 for (int r = 0; r < this->next_temp; r++) { 3531 for (int c = 0; c < 4; c++) { 3532 if (!writes[4 * r + c]) 3533 continue; 3534 3535 if (write_level[4 * r + c] == level) 3536 write_level[4 * r + c] = level-1; 3537 } 3538 } 3539 3540 if(inst->op == TGSI_OPCODE_ENDIF) 3541 --level; 3542 3543 break; 3544 3545 case TGSI_OPCODE_IF: 3546 ++level; 3547 /* fallthrough to default case to mark the condition as read */ 3548 3549 default: 3550 /* Continuing the block, clear any channels from the write array that 3551 * are read by this instruction. 3552 */ 3553 for (unsigned i = 0; i < Elements(inst->src); i++) { 3554 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3555 /* Any temporary might be read, so no dead code elimination 3556 * across this instruction. 3557 */ 3558 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3559 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3560 /* Clear where it's used as src. */ 3561 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3562 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3563 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3564 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3565 3566 for (int c = 0; c < 4; c++) { 3567 if (src_chans & (1 << c)) { 3568 writes[4 * inst->src[i].index + c] = NULL; 3569 } 3570 } 3571 } 3572 } 3573 break; 3574 } 3575 3576 /* If this instruction writes to a temporary, add it to the write array. 3577 * If there is already an instruction in the write array for one or more 3578 * of the channels, flag that channel write as dead. 3579 */ 3580 if (inst->dst.file == PROGRAM_TEMPORARY && 3581 !inst->dst.reladdr && 3582 !inst->saturate) { 3583 for (int c = 0; c < 4; c++) { 3584 if (inst->dst.writemask & (1 << c)) { 3585 if (writes[4 * inst->dst.index + c]) { 3586 if (write_level[4 * inst->dst.index + c] < level) 3587 continue; 3588 else 3589 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3590 } 3591 writes[4 * inst->dst.index + c] = inst; 3592 write_level[4 * inst->dst.index + c] = level; 3593 } 3594 } 3595 } 3596 } 3597 3598 /* Anything still in the write array at this point is dead code. */ 3599 for (int r = 0; r < this->next_temp; r++) { 3600 for (int c = 0; c < 4; c++) { 3601 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3602 if (inst) 3603 inst->dead_mask |= (1 << c); 3604 } 3605 } 3606 3607 /* Now actually remove the instructions that are completely dead and update 3608 * the writemask of other instructions with dead channels. 3609 */ 3610 foreach_iter(exec_list_iterator, iter, this->instructions) { 3611 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3612 3613 if (!inst->dead_mask || !inst->dst.writemask) 3614 continue; 3615 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3616 iter.remove(); 3617 delete inst; 3618 removed++; 3619 } else 3620 inst->dst.writemask &= ~(inst->dead_mask); 3621 } 3622 3623 ralloc_free(write_level); 3624 ralloc_free(writes); 3625 3626 return removed; 3627} 3628 3629/* Merges temporary registers together where possible to reduce the number of 3630 * registers needed to run a program. 3631 * 3632 * Produces optimal code only after copy propagation and dead code elimination 3633 * have been run. */ 3634void 3635glsl_to_tgsi_visitor::merge_registers(void) 3636{ 3637 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3638 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3639 int i, j; 3640 3641 /* Read the indices of the last read and first write to each temp register 3642 * into an array so that we don't have to traverse the instruction list as 3643 * much. */ 3644 for (i=0; i < this->next_temp; i++) { 3645 last_reads[i] = get_last_temp_read(i); 3646 first_writes[i] = get_first_temp_write(i); 3647 } 3648 3649 /* Start looking for registers with non-overlapping usages that can be 3650 * merged together. */ 3651 for (i=0; i < this->next_temp; i++) { 3652 /* Don't touch unused registers. */ 3653 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3654 3655 for (j=0; j < this->next_temp; j++) { 3656 /* Don't touch unused registers. */ 3657 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3658 3659 /* We can merge the two registers if the first write to j is after or 3660 * in the same instruction as the last read from i. Note that the 3661 * register at index i will always be used earlier or at the same time 3662 * as the register at index j. */ 3663 if (first_writes[i] <= first_writes[j] && 3664 last_reads[i] <= first_writes[j]) 3665 { 3666 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3667 3668 /* Update the first_writes and last_reads arrays with the new 3669 * values for the merged register index, and mark the newly unused 3670 * register index as such. */ 3671 last_reads[i] = last_reads[j]; 3672 first_writes[j] = -1; 3673 last_reads[j] = -1; 3674 } 3675 } 3676 } 3677 3678 ralloc_free(last_reads); 3679 ralloc_free(first_writes); 3680} 3681 3682/* Reassign indices to temporary registers by reusing unused indices created 3683 * by optimization passes. */ 3684void 3685glsl_to_tgsi_visitor::renumber_registers(void) 3686{ 3687 int i = 0; 3688 int new_index = 0; 3689 3690 for (i=0; i < this->next_temp; i++) { 3691 if (get_first_temp_read(i) < 0) continue; 3692 if (i != new_index) 3693 rename_temp_register(i, new_index); 3694 new_index++; 3695 } 3696 3697 this->next_temp = new_index; 3698} 3699 3700/** 3701 * Returns a fragment program which implements the current pixel transfer ops. 3702 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3703 */ 3704extern "C" void 3705get_pixel_transfer_visitor(struct st_fragment_program *fp, 3706 glsl_to_tgsi_visitor *original, 3707 int scale_and_bias, int pixel_maps) 3708{ 3709 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3710 struct st_context *st = st_context(original->ctx); 3711 struct gl_program *prog = &fp->Base.Base; 3712 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3713 st_src_reg coord, src0; 3714 st_dst_reg dst0; 3715 glsl_to_tgsi_instruction *inst; 3716 3717 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3718 v->ctx = original->ctx; 3719 v->prog = prog; 3720 v->shader_program = NULL; 3721 v->glsl_version = original->glsl_version; 3722 v->native_integers = original->native_integers; 3723 v->options = original->options; 3724 v->next_temp = original->next_temp; 3725 v->num_address_regs = original->num_address_regs; 3726 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3727 v->indirect_addr_temps = original->indirect_addr_temps; 3728 v->indirect_addr_consts = original->indirect_addr_consts; 3729 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3730 3731 /* 3732 * Get initial pixel color from the texture. 3733 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3734 */ 3735 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3736 src0 = v->get_temp(glsl_type::vec4_type); 3737 dst0 = st_dst_reg(src0); 3738 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3739 inst->sampler = 0; 3740 inst->tex_target = TEXTURE_2D_INDEX; 3741 3742 prog->InputsRead |= FRAG_BIT_TEX0; 3743 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3744 v->samplers_used |= (1 << 0); 3745 3746 if (scale_and_bias) { 3747 static const gl_state_index scale_state[STATE_LENGTH] = 3748 { STATE_INTERNAL, STATE_PT_SCALE, 3749 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3750 static const gl_state_index bias_state[STATE_LENGTH] = 3751 { STATE_INTERNAL, STATE_PT_BIAS, 3752 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3753 GLint scale_p, bias_p; 3754 st_src_reg scale, bias; 3755 3756 scale_p = _mesa_add_state_reference(params, scale_state); 3757 bias_p = _mesa_add_state_reference(params, bias_state); 3758 3759 /* MAD colorTemp, colorTemp, scale, bias; */ 3760 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3761 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3762 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3763 } 3764 3765 if (pixel_maps) { 3766 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3767 st_dst_reg temp_dst = st_dst_reg(temp); 3768 3769 assert(st->pixel_xfer.pixelmap_texture); 3770 3771 /* With a little effort, we can do four pixel map look-ups with 3772 * two TEX instructions: 3773 */ 3774 3775 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3776 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3777 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3778 inst->sampler = 1; 3779 inst->tex_target = TEXTURE_2D_INDEX; 3780 3781 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3782 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3783 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3784 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3785 inst->sampler = 1; 3786 inst->tex_target = TEXTURE_2D_INDEX; 3787 3788 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3789 v->samplers_used |= (1 << 1); 3790 3791 /* MOV colorTemp, temp; */ 3792 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3793 } 3794 3795 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3796 * new visitor. */ 3797 foreach_iter(exec_list_iterator, iter, original->instructions) { 3798 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3799 glsl_to_tgsi_instruction *newinst; 3800 st_src_reg src_regs[3]; 3801 3802 if (inst->dst.file == PROGRAM_OUTPUT) 3803 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3804 3805 for (int i=0; i<3; i++) { 3806 src_regs[i] = inst->src[i]; 3807 if (src_regs[i].file == PROGRAM_INPUT && 3808 src_regs[i].index == FRAG_ATTRIB_COL0) 3809 { 3810 src_regs[i].file = PROGRAM_TEMPORARY; 3811 src_regs[i].index = src0.index; 3812 } 3813 else if (src_regs[i].file == PROGRAM_INPUT) 3814 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3815 } 3816 3817 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3818 newinst->tex_target = inst->tex_target; 3819 } 3820 3821 /* Make modifications to fragment program info. */ 3822 prog->Parameters = _mesa_combine_parameter_lists(params, 3823 original->prog->Parameters); 3824 _mesa_free_parameter_list(params); 3825 count_resources(v, prog); 3826 fp->glsl_to_tgsi = v; 3827} 3828 3829/** 3830 * Make fragment program for glBitmap: 3831 * Sample the texture and kill the fragment if the bit is 0. 3832 * This program will be combined with the user's fragment program. 3833 * 3834 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3835 */ 3836extern "C" void 3837get_bitmap_visitor(struct st_fragment_program *fp, 3838 glsl_to_tgsi_visitor *original, int samplerIndex) 3839{ 3840 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3841 struct st_context *st = st_context(original->ctx); 3842 struct gl_program *prog = &fp->Base.Base; 3843 st_src_reg coord, src0; 3844 st_dst_reg dst0; 3845 glsl_to_tgsi_instruction *inst; 3846 3847 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3848 v->ctx = original->ctx; 3849 v->prog = prog; 3850 v->shader_program = NULL; 3851 v->glsl_version = original->glsl_version; 3852 v->native_integers = original->native_integers; 3853 v->options = original->options; 3854 v->next_temp = original->next_temp; 3855 v->num_address_regs = original->num_address_regs; 3856 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3857 v->indirect_addr_temps = original->indirect_addr_temps; 3858 v->indirect_addr_consts = original->indirect_addr_consts; 3859 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3860 3861 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3862 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3863 src0 = v->get_temp(glsl_type::vec4_type); 3864 dst0 = st_dst_reg(src0); 3865 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3866 inst->sampler = samplerIndex; 3867 inst->tex_target = TEXTURE_2D_INDEX; 3868 3869 prog->InputsRead |= FRAG_BIT_TEX0; 3870 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3871 v->samplers_used |= (1 << samplerIndex); 3872 3873 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3874 src0.negate = NEGATE_XYZW; 3875 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3876 src0.swizzle = SWIZZLE_XXXX; 3877 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3878 3879 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3880 * new visitor. */ 3881 foreach_iter(exec_list_iterator, iter, original->instructions) { 3882 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3883 glsl_to_tgsi_instruction *newinst; 3884 st_src_reg src_regs[3]; 3885 3886 if (inst->dst.file == PROGRAM_OUTPUT) 3887 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3888 3889 for (int i=0; i<3; i++) { 3890 src_regs[i] = inst->src[i]; 3891 if (src_regs[i].file == PROGRAM_INPUT) 3892 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3893 } 3894 3895 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3896 newinst->tex_target = inst->tex_target; 3897 } 3898 3899 /* Make modifications to fragment program info. */ 3900 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3901 count_resources(v, prog); 3902 fp->glsl_to_tgsi = v; 3903} 3904 3905/* ------------------------- TGSI conversion stuff -------------------------- */ 3906struct label { 3907 unsigned branch_target; 3908 unsigned token; 3909}; 3910 3911/** 3912 * Intermediate state used during shader translation. 3913 */ 3914struct st_translate { 3915 struct ureg_program *ureg; 3916 3917 struct ureg_dst temps[MAX_TEMPS]; 3918 struct ureg_src *constants; 3919 struct ureg_src *immediates; 3920 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3921 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3922 struct ureg_dst address[1]; 3923 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3924 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3925 3926 /* Extra info for handling point size clamping in vertex shader */ 3927 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3928 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3929 GLint pointSizeOutIndex; /**< Temp point size output register */ 3930 GLboolean prevInstWrotePointSize; 3931 3932 const GLuint *inputMapping; 3933 const GLuint *outputMapping; 3934 3935 /* For every instruction that contains a label (eg CALL), keep 3936 * details so that we can go back afterwards and emit the correct 3937 * tgsi instruction number for each label. 3938 */ 3939 struct label *labels; 3940 unsigned labels_size; 3941 unsigned labels_count; 3942 3943 /* Keep a record of the tgsi instruction number that each mesa 3944 * instruction starts at, will be used to fix up labels after 3945 * translation. 3946 */ 3947 unsigned *insn; 3948 unsigned insn_size; 3949 unsigned insn_count; 3950 3951 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3952 3953 boolean error; 3954}; 3955 3956/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3957static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3958 TGSI_SEMANTIC_FACE, 3959 TGSI_SEMANTIC_VERTEXID, 3960 TGSI_SEMANTIC_INSTANCEID 3961}; 3962 3963/** 3964 * Make note of a branch to a label in the TGSI code. 3965 * After we've emitted all instructions, we'll go over the list 3966 * of labels built here and patch the TGSI code with the actual 3967 * location of each label. 3968 */ 3969static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3970{ 3971 unsigned i; 3972 3973 if (t->labels_count + 1 >= t->labels_size) { 3974 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3975 t->labels = (struct label *)realloc(t->labels, 3976 t->labels_size * sizeof(struct label)); 3977 if (t->labels == NULL) { 3978 static unsigned dummy; 3979 t->error = TRUE; 3980 return &dummy; 3981 } 3982 } 3983 3984 i = t->labels_count++; 3985 t->labels[i].branch_target = branch_target; 3986 return &t->labels[i].token; 3987} 3988 3989/** 3990 * Called prior to emitting the TGSI code for each instruction. 3991 * Allocate additional space for instructions if needed. 3992 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3993 * the next TGSI instruction. 3994 */ 3995static void set_insn_start(struct st_translate *t, unsigned start) 3996{ 3997 if (t->insn_count + 1 >= t->insn_size) { 3998 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3999 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 4000 if (t->insn == NULL) { 4001 t->error = TRUE; 4002 return; 4003 } 4004 } 4005 4006 t->insn[t->insn_count++] = start; 4007} 4008 4009/** 4010 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 4011 */ 4012static struct ureg_src 4013emit_immediate(struct st_translate *t, 4014 gl_constant_value values[4], 4015 int type, int size) 4016{ 4017 struct ureg_program *ureg = t->ureg; 4018 4019 switch(type) 4020 { 4021 case GL_FLOAT: 4022 return ureg_DECL_immediate(ureg, &values[0].f, size); 4023 case GL_INT: 4024 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 4025 case GL_UNSIGNED_INT: 4026 case GL_BOOL: 4027 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 4028 default: 4029 assert(!"should not get here - type must be float, int, uint, or bool"); 4030 return ureg_src_undef(); 4031 } 4032} 4033 4034/** 4035 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 4036 */ 4037static struct ureg_dst 4038dst_register(struct st_translate *t, 4039 gl_register_file file, 4040 GLuint index) 4041{ 4042 switch(file) { 4043 case PROGRAM_UNDEFINED: 4044 return ureg_dst_undef(); 4045 4046 case PROGRAM_TEMPORARY: 4047 if (ureg_dst_is_undef(t->temps[index])) 4048 t->temps[index] = ureg_DECL_temporary(t->ureg); 4049 4050 return t->temps[index]; 4051 4052 case PROGRAM_OUTPUT: 4053 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 4054 t->prevInstWrotePointSize = GL_TRUE; 4055 4056 if (t->procType == TGSI_PROCESSOR_VERTEX) 4057 assert(index < VERT_RESULT_MAX); 4058 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4059 assert(index < FRAG_RESULT_MAX); 4060 else 4061 assert(index < GEOM_RESULT_MAX); 4062 4063 assert(t->outputMapping[index] < Elements(t->outputs)); 4064 4065 return t->outputs[t->outputMapping[index]]; 4066 4067 case PROGRAM_ADDRESS: 4068 return t->address[index]; 4069 4070 default: 4071 assert(!"unknown dst register file"); 4072 return ureg_dst_undef(); 4073 } 4074} 4075 4076/** 4077 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4078 */ 4079static struct ureg_src 4080src_register(struct st_translate *t, 4081 gl_register_file file, 4082 GLuint index) 4083{ 4084 switch(file) { 4085 case PROGRAM_UNDEFINED: 4086 return ureg_src_undef(); 4087 4088 case PROGRAM_TEMPORARY: 4089 assert(index >= 0); 4090 assert(index < Elements(t->temps)); 4091 if (ureg_dst_is_undef(t->temps[index])) 4092 t->temps[index] = ureg_DECL_temporary(t->ureg); 4093 return ureg_src(t->temps[index]); 4094 4095 case PROGRAM_NAMED_PARAM: 4096 case PROGRAM_ENV_PARAM: 4097 case PROGRAM_LOCAL_PARAM: 4098 case PROGRAM_UNIFORM: 4099 assert(index >= 0); 4100 return t->constants[index]; 4101 case PROGRAM_STATE_VAR: 4102 case PROGRAM_CONSTANT: /* ie, immediate */ 4103 if (index < 0) 4104 return ureg_DECL_constant(t->ureg, 0); 4105 else 4106 return t->constants[index]; 4107 4108 case PROGRAM_IMMEDIATE: 4109 return t->immediates[index]; 4110 4111 case PROGRAM_INPUT: 4112 assert(t->inputMapping[index] < Elements(t->inputs)); 4113 return t->inputs[t->inputMapping[index]]; 4114 4115 case PROGRAM_OUTPUT: 4116 assert(t->outputMapping[index] < Elements(t->outputs)); 4117 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4118 4119 case PROGRAM_ADDRESS: 4120 return ureg_src(t->address[index]); 4121 4122 case PROGRAM_SYSTEM_VALUE: 4123 assert(index < Elements(t->systemValues)); 4124 return t->systemValues[index]; 4125 4126 default: 4127 assert(!"unknown src register file"); 4128 return ureg_src_undef(); 4129 } 4130} 4131 4132/** 4133 * Create a TGSI ureg_dst register from an st_dst_reg. 4134 */ 4135static struct ureg_dst 4136translate_dst(struct st_translate *t, 4137 const st_dst_reg *dst_reg, 4138 bool saturate) 4139{ 4140 struct ureg_dst dst = dst_register(t, 4141 dst_reg->file, 4142 dst_reg->index); 4143 4144 dst = ureg_writemask(dst, dst_reg->writemask); 4145 4146 if (saturate) 4147 dst = ureg_saturate(dst); 4148 4149 if (dst_reg->reladdr != NULL) 4150 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4151 4152 return dst; 4153} 4154 4155/** 4156 * Create a TGSI ureg_src register from an st_src_reg. 4157 */ 4158static struct ureg_src 4159translate_src(struct st_translate *t, const st_src_reg *src_reg) 4160{ 4161 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4162 4163 src = ureg_swizzle(src, 4164 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4165 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4166 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4167 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4168 4169 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4170 src = ureg_negate(src); 4171 4172 if (src_reg->reladdr != NULL) { 4173 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4174 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4175 * set the bit for src.Negate. So we have to do the operation manually 4176 * here to work around the compiler's problems. */ 4177 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4178 struct ureg_src addr = ureg_src(t->address[0]); 4179 src.Indirect = 1; 4180 src.IndirectFile = addr.File; 4181 src.IndirectIndex = addr.Index; 4182 src.IndirectSwizzle = addr.SwizzleX; 4183 4184 if (src_reg->file != PROGRAM_INPUT && 4185 src_reg->file != PROGRAM_OUTPUT) { 4186 /* If src_reg->index was negative, it was set to zero in 4187 * src_register(). Reassign it now. But don't do this 4188 * for input/output regs since they get remapped while 4189 * const buffers don't. 4190 */ 4191 src.Index = src_reg->index; 4192 } 4193 } 4194 4195 return src; 4196} 4197 4198static struct tgsi_texture_offset 4199translate_tex_offset(struct st_translate *t, 4200 const struct tgsi_texture_offset *in_offset) 4201{ 4202 struct tgsi_texture_offset offset; 4203 4204 assert(in_offset->File == PROGRAM_IMMEDIATE); 4205 4206 offset.File = TGSI_FILE_IMMEDIATE; 4207 offset.Index = in_offset->Index; 4208 offset.SwizzleX = in_offset->SwizzleX; 4209 offset.SwizzleY = in_offset->SwizzleY; 4210 offset.SwizzleZ = in_offset->SwizzleZ; 4211 4212 return offset; 4213} 4214 4215static void 4216compile_tgsi_instruction(struct st_translate *t, 4217 const glsl_to_tgsi_instruction *inst) 4218{ 4219 struct ureg_program *ureg = t->ureg; 4220 GLuint i; 4221 struct ureg_dst dst[1]; 4222 struct ureg_src src[4]; 4223 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4224 4225 unsigned num_dst; 4226 unsigned num_src; 4227 4228 num_dst = num_inst_dst_regs(inst->op); 4229 num_src = num_inst_src_regs(inst->op); 4230 4231 if (num_dst) 4232 dst[0] = translate_dst(t, 4233 &inst->dst, 4234 inst->saturate); 4235 4236 for (i = 0; i < num_src; i++) 4237 src[i] = translate_src(t, &inst->src[i]); 4238 4239 switch(inst->op) { 4240 case TGSI_OPCODE_BGNLOOP: 4241 case TGSI_OPCODE_CAL: 4242 case TGSI_OPCODE_ELSE: 4243 case TGSI_OPCODE_ENDLOOP: 4244 case TGSI_OPCODE_IF: 4245 assert(num_dst == 0); 4246 ureg_label_insn(ureg, 4247 inst->op, 4248 src, num_src, 4249 get_label(t, 4250 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4251 return; 4252 4253 case TGSI_OPCODE_TEX: 4254 case TGSI_OPCODE_TXB: 4255 case TGSI_OPCODE_TXD: 4256 case TGSI_OPCODE_TXL: 4257 case TGSI_OPCODE_TXP: 4258 case TGSI_OPCODE_TXQ: 4259 case TGSI_OPCODE_TXF: 4260 src[num_src++] = t->samplers[inst->sampler]; 4261 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4262 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4263 } 4264 ureg_tex_insn(ureg, 4265 inst->op, 4266 dst, num_dst, 4267 translate_texture_target(inst->tex_target, inst->tex_shadow), 4268 texoffsets, inst->tex_offset_num_offset, 4269 src, num_src); 4270 return; 4271 4272 case TGSI_OPCODE_SCS: 4273 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4274 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4275 break; 4276 4277 default: 4278 ureg_insn(ureg, 4279 inst->op, 4280 dst, num_dst, 4281 src, num_src); 4282 break; 4283 } 4284} 4285 4286/** 4287 * Emit the TGSI instructions for inverting and adjusting WPOS. 4288 * This code is unavoidable because it also depends on whether 4289 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4290 */ 4291static void 4292emit_wpos_adjustment( struct st_translate *t, 4293 const struct gl_program *program, 4294 boolean invert, 4295 GLfloat adjX, GLfloat adjY[2]) 4296{ 4297 struct ureg_program *ureg = t->ureg; 4298 4299 /* Fragment program uses fragment position input. 4300 * Need to replace instances of INPUT[WPOS] with temp T 4301 * where T = INPUT[WPOS] by y is inverted. 4302 */ 4303 static const gl_state_index wposTransformState[STATE_LENGTH] 4304 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4305 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4306 4307 /* XXX: note we are modifying the incoming shader here! Need to 4308 * do this before emitting the constant decls below, or this 4309 * will be missed: 4310 */ 4311 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4312 wposTransformState); 4313 4314 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4315 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4316 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4317 4318 /* First, apply the coordinate shift: */ 4319 if (adjX || adjY[0] || adjY[1]) { 4320 if (adjY[0] != adjY[1]) { 4321 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4322 * depending on whether inversion is actually going to be applied 4323 * or not, which is determined by testing against the inversion 4324 * state variable used below, which will be either +1 or -1. 4325 */ 4326 struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); 4327 4328 ureg_CMP(ureg, adj_temp, 4329 ureg_scalar(wpostrans, invert ? 2 : 0), 4330 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4331 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4332 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4333 } else { 4334 ureg_ADD(ureg, wpos_temp, wpos_input, 4335 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4336 } 4337 wpos_input = ureg_src(wpos_temp); 4338 } else { 4339 /* MOV wpos_temp, input[wpos] 4340 */ 4341 ureg_MOV( ureg, wpos_temp, wpos_input ); 4342 } 4343 4344 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4345 * inversion/identity, or the other way around if we're drawing to an FBO. 4346 */ 4347 if (invert) { 4348 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4349 */ 4350 ureg_MAD( ureg, 4351 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4352 wpos_input, 4353 ureg_scalar(wpostrans, 0), 4354 ureg_scalar(wpostrans, 1)); 4355 } else { 4356 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4357 */ 4358 ureg_MAD( ureg, 4359 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4360 wpos_input, 4361 ureg_scalar(wpostrans, 2), 4362 ureg_scalar(wpostrans, 3)); 4363 } 4364 4365 /* Use wpos_temp as position input from here on: 4366 */ 4367 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4368} 4369 4370 4371/** 4372 * Emit fragment position/ooordinate code. 4373 */ 4374static void 4375emit_wpos(struct st_context *st, 4376 struct st_translate *t, 4377 const struct gl_program *program, 4378 struct ureg_program *ureg) 4379{ 4380 const struct gl_fragment_program *fp = 4381 (const struct gl_fragment_program *) program; 4382 struct pipe_screen *pscreen = st->pipe->screen; 4383 GLfloat adjX = 0.0f; 4384 GLfloat adjY[2] = { 0.0f, 0.0f }; 4385 boolean invert = FALSE; 4386 4387 /* Query the pixel center conventions supported by the pipe driver and set 4388 * adjX, adjY to help out if it cannot handle the requested one internally. 4389 * 4390 * The bias of the y-coordinate depends on whether y-inversion takes place 4391 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4392 * drawing to an FBO (causes additional inversion), and whether the the pipe 4393 * driver origin and the requested origin differ (the latter condition is 4394 * stored in the 'invert' variable). 4395 * 4396 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4397 * 4398 * center shift only: 4399 * i -> h: +0.5 4400 * h -> i: -0.5 4401 * 4402 * inversion only: 4403 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4404 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4405 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4406 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4407 * 4408 * inversion and center shift: 4409 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4410 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4411 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4412 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4413 */ 4414 if (fp->OriginUpperLeft) { 4415 /* Fragment shader wants origin in upper-left */ 4416 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4417 /* the driver supports upper-left origin */ 4418 } 4419 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4420 /* the driver supports lower-left origin, need to invert Y */ 4421 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4422 invert = TRUE; 4423 } 4424 else 4425 assert(0); 4426 } 4427 else { 4428 /* Fragment shader wants origin in lower-left */ 4429 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4430 /* the driver supports lower-left origin */ 4431 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4432 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4433 /* the driver supports upper-left origin, need to invert Y */ 4434 invert = TRUE; 4435 else 4436 assert(0); 4437 } 4438 4439 if (fp->PixelCenterInteger) { 4440 /* Fragment shader wants pixel center integer */ 4441 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4442 /* the driver supports pixel center integer */ 4443 adjY[1] = 1.0f; 4444 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4445 } 4446 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4447 /* the driver supports pixel center half integer, need to bias X,Y */ 4448 adjX = -0.5f; 4449 adjY[0] = -0.5f; 4450 adjY[1] = 0.5f; 4451 } 4452 else 4453 assert(0); 4454 } 4455 else { 4456 /* Fragment shader wants pixel center half integer */ 4457 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4458 /* the driver supports pixel center half integer */ 4459 } 4460 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4461 /* the driver supports pixel center integer, need to bias X,Y */ 4462 adjX = adjY[0] = adjY[1] = 0.5f; 4463 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4464 } 4465 else 4466 assert(0); 4467 } 4468 4469 /* we invert after adjustment so that we avoid the MOV to temporary, 4470 * and reuse the adjustment ADD instead */ 4471 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4472} 4473 4474/** 4475 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4476 * TGSI uses +1 for front, -1 for back. 4477 * This function converts the TGSI value to the GL value. Simply clamping/ 4478 * saturating the value to [0,1] does the job. 4479 */ 4480static void 4481emit_face_var(struct st_translate *t) 4482{ 4483 struct ureg_program *ureg = t->ureg; 4484 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4485 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4486 4487 /* MOV_SAT face_temp, input[face] */ 4488 face_temp = ureg_saturate(face_temp); 4489 ureg_MOV(ureg, face_temp, face_input); 4490 4491 /* Use face_temp as face input from here on: */ 4492 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4493} 4494 4495static void 4496emit_edgeflags(struct st_translate *t) 4497{ 4498 struct ureg_program *ureg = t->ureg; 4499 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4500 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4501 4502 ureg_MOV(ureg, edge_dst, edge_src); 4503} 4504 4505/** 4506 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4507 * \param program the program to translate 4508 * \param numInputs number of input registers used 4509 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4510 * input indexes 4511 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4512 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4513 * each input 4514 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4515 * \param numOutputs number of output registers used 4516 * \param outputMapping maps Mesa fragment program outputs to TGSI 4517 * generic outputs 4518 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4519 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4520 * each output 4521 * 4522 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4523 */ 4524extern "C" enum pipe_error 4525st_translate_program( 4526 struct gl_context *ctx, 4527 uint procType, 4528 struct ureg_program *ureg, 4529 glsl_to_tgsi_visitor *program, 4530 const struct gl_program *proginfo, 4531 GLuint numInputs, 4532 const GLuint inputMapping[], 4533 const ubyte inputSemanticName[], 4534 const ubyte inputSemanticIndex[], 4535 const GLuint interpMode[], 4536 GLuint numOutputs, 4537 const GLuint outputMapping[], 4538 const ubyte outputSemanticName[], 4539 const ubyte outputSemanticIndex[], 4540 boolean passthrough_edgeflags) 4541{ 4542 struct st_translate *t; 4543 unsigned i; 4544 enum pipe_error ret = PIPE_OK; 4545 4546 assert(numInputs <= Elements(t->inputs)); 4547 assert(numOutputs <= Elements(t->outputs)); 4548 4549 t = CALLOC_STRUCT(st_translate); 4550 if (!t) { 4551 ret = PIPE_ERROR_OUT_OF_MEMORY; 4552 goto out; 4553 } 4554 4555 memset(t, 0, sizeof *t); 4556 4557 t->procType = procType; 4558 t->inputMapping = inputMapping; 4559 t->outputMapping = outputMapping; 4560 t->ureg = ureg; 4561 t->pointSizeOutIndex = -1; 4562 t->prevInstWrotePointSize = GL_FALSE; 4563 4564 if (program->shader_program) { 4565 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { 4566 struct gl_uniform_storage *const storage = 4567 &program->shader_program->UniformStorage[i]; 4568 4569 _mesa_uniform_detach_all_driver_storage(storage); 4570 } 4571 } 4572 4573 /* 4574 * Declare input attributes. 4575 */ 4576 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4577 for (i = 0; i < numInputs; i++) { 4578 t->inputs[i] = ureg_DECL_fs_input(ureg, 4579 inputSemanticName[i], 4580 inputSemanticIndex[i], 4581 interpMode[i]); 4582 } 4583 4584 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4585 /* Must do this after setting up t->inputs, and before 4586 * emitting constant references, below: 4587 */ 4588 emit_wpos(st_context(ctx), t, proginfo, ureg); 4589 } 4590 4591 if (proginfo->InputsRead & FRAG_BIT_FACE) 4592 emit_face_var(t); 4593 4594 /* 4595 * Declare output attributes. 4596 */ 4597 for (i = 0; i < numOutputs; i++) { 4598 switch (outputSemanticName[i]) { 4599 case TGSI_SEMANTIC_POSITION: 4600 t->outputs[i] = ureg_DECL_output(ureg, 4601 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4602 outputSemanticIndex[i]); 4603 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4604 break; 4605 case TGSI_SEMANTIC_STENCIL: 4606 t->outputs[i] = ureg_DECL_output(ureg, 4607 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4608 outputSemanticIndex[i]); 4609 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4610 break; 4611 case TGSI_SEMANTIC_COLOR: 4612 t->outputs[i] = ureg_DECL_output(ureg, 4613 TGSI_SEMANTIC_COLOR, 4614 outputSemanticIndex[i]); 4615 break; 4616 default: 4617 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4618 ret = PIPE_ERROR_BAD_INPUT; 4619 goto out; 4620 } 4621 } 4622 } 4623 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4624 for (i = 0; i < numInputs; i++) { 4625 t->inputs[i] = ureg_DECL_gs_input(ureg, 4626 i, 4627 inputSemanticName[i], 4628 inputSemanticIndex[i]); 4629 } 4630 4631 for (i = 0; i < numOutputs; i++) { 4632 t->outputs[i] = ureg_DECL_output(ureg, 4633 outputSemanticName[i], 4634 outputSemanticIndex[i]); 4635 } 4636 } 4637 else { 4638 assert(procType == TGSI_PROCESSOR_VERTEX); 4639 4640 for (i = 0; i < numInputs; i++) { 4641 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4642 } 4643 4644 for (i = 0; i < numOutputs; i++) { 4645 if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) { 4646 int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW; 4647 t->outputs[i] = ureg_DECL_output_masked(ureg, 4648 outputSemanticName[i], 4649 outputSemanticIndex[i], 4650 mask); 4651 } else { 4652 t->outputs[i] = ureg_DECL_output(ureg, 4653 outputSemanticName[i], 4654 outputSemanticIndex[i]); 4655 } 4656 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4657 /* Writing to the point size result register requires special 4658 * handling to implement clamping. 4659 */ 4660 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4661 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4662 /* XXX: note we are modifying the incoming shader here! Need to 4663 * do this before emitting the constant decls below, or this 4664 * will be missed. 4665 */ 4666 unsigned pointSizeClampConst = 4667 _mesa_add_state_reference(proginfo->Parameters, 4668 pointSizeClampState); 4669 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); 4670 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); 4671 t->pointSizeResult = t->outputs[i]; 4672 t->pointSizeOutIndex = i; 4673 t->outputs[i] = psizregtemp; 4674 } 4675 } 4676 if (passthrough_edgeflags) 4677 emit_edgeflags(t); 4678 } 4679 4680 /* Declare address register. 4681 */ 4682 if (program->num_address_regs > 0) { 4683 assert(program->num_address_regs == 1); 4684 t->address[0] = ureg_DECL_address(ureg); 4685 } 4686 4687 /* Declare misc input registers 4688 */ 4689 { 4690 GLbitfield sysInputs = proginfo->SystemValuesRead; 4691 unsigned numSys = 0; 4692 for (i = 0; sysInputs; i++) { 4693 if (sysInputs & (1 << i)) { 4694 unsigned semName = mesa_sysval_to_semantic[i]; 4695 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4696 numSys++; 4697 sysInputs &= ~(1 << i); 4698 } 4699 } 4700 } 4701 4702 if (program->indirect_addr_temps) { 4703 /* If temps are accessed with indirect addressing, declare temporaries 4704 * in sequential order. Else, we declare them on demand elsewhere. 4705 * (Note: the number of temporaries is equal to program->next_temp) 4706 */ 4707 for (i = 0; i < (unsigned)program->next_temp; i++) { 4708 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4709 t->temps[i] = ureg_DECL_temporary(t->ureg); 4710 } 4711 } 4712 4713 /* Emit constants and uniforms. TGSI uses a single index space for these, 4714 * so we put all the translated regs in t->constants. 4715 */ 4716 if (proginfo->Parameters) { 4717 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4718 if (t->constants == NULL) { 4719 ret = PIPE_ERROR_OUT_OF_MEMORY; 4720 goto out; 4721 } 4722 4723 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4724 switch (proginfo->Parameters->Parameters[i].Type) { 4725 case PROGRAM_ENV_PARAM: 4726 case PROGRAM_LOCAL_PARAM: 4727 case PROGRAM_STATE_VAR: 4728 case PROGRAM_NAMED_PARAM: 4729 case PROGRAM_UNIFORM: 4730 t->constants[i] = ureg_DECL_constant(ureg, i); 4731 break; 4732 4733 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4734 * addressing of the const buffer. 4735 * FIXME: Be smarter and recognize param arrays: 4736 * indirect addressing is only valid within the referenced 4737 * array. 4738 */ 4739 case PROGRAM_CONSTANT: 4740 if (program->indirect_addr_consts) 4741 t->constants[i] = ureg_DECL_constant(ureg, i); 4742 else 4743 t->constants[i] = emit_immediate(t, 4744 proginfo->Parameters->ParameterValues[i], 4745 proginfo->Parameters->Parameters[i].DataType, 4746 4); 4747 break; 4748 default: 4749 break; 4750 } 4751 } 4752 } 4753 4754 /* Emit immediate values. 4755 */ 4756 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4757 if (t->immediates == NULL) { 4758 ret = PIPE_ERROR_OUT_OF_MEMORY; 4759 goto out; 4760 } 4761 i = 0; 4762 foreach_iter(exec_list_iterator, iter, program->immediates) { 4763 immediate_storage *imm = (immediate_storage *)iter.get(); 4764 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4765 } 4766 4767 /* texture samplers */ 4768 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4769 if (program->samplers_used & (1 << i)) { 4770 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4771 } 4772 } 4773 4774 /* Emit each instruction in turn: 4775 */ 4776 foreach_iter(exec_list_iterator, iter, program->instructions) { 4777 set_insn_start(t, ureg_get_instruction_number(ureg)); 4778 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); 4779 4780 if (t->prevInstWrotePointSize && proginfo->Id) { 4781 /* The previous instruction wrote to the (fake) vertex point size 4782 * result register. Now we need to clamp that value to the min/max 4783 * point size range, putting the result into the real point size 4784 * register. 4785 * Note that we can't do this easily at the end of program due to 4786 * possible early return. 4787 */ 4788 set_insn_start(t, ureg_get_instruction_number(ureg)); 4789 ureg_MAX(t->ureg, 4790 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4791 ureg_src(t->outputs[t->pointSizeOutIndex]), 4792 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4793 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4794 ureg_src(t->outputs[t->pointSizeOutIndex]), 4795 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4796 } 4797 t->prevInstWrotePointSize = GL_FALSE; 4798 } 4799 4800 /* Fix up all emitted labels: 4801 */ 4802 for (i = 0; i < t->labels_count; i++) { 4803 ureg_fixup_label(ureg, t->labels[i].token, 4804 t->insn[t->labels[i].branch_target]); 4805 } 4806 4807 if (program->shader_program) { 4808 /* This has to be done last. Any operation the can cause 4809 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4810 * program constant) has to happen before creating this linkage. 4811 */ 4812 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4813 if (program->shader_program->_LinkedShaders[i] == NULL) 4814 continue; 4815 4816 _mesa_associate_uniform_storage(ctx, program->shader_program, 4817 program->shader_program->_LinkedShaders[i]->Program->Parameters); 4818 } 4819 } 4820 4821out: 4822 if (t) { 4823 FREE(t->insn); 4824 FREE(t->labels); 4825 FREE(t->constants); 4826 FREE(t->immediates); 4827 4828 if (t->error) { 4829 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4830 } 4831 4832 FREE(t); 4833 } 4834 4835 return ret; 4836} 4837/* ----------------------------- End TGSI code ------------------------------ */ 4838 4839/** 4840 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4841 * generating Mesa IR. 4842 */ 4843static struct gl_program * 4844get_mesa_program(struct gl_context *ctx, 4845 struct gl_shader_program *shader_program, 4846 struct gl_shader *shader, 4847 int num_clip_distances) 4848{ 4849 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4850 struct gl_program *prog; 4851 struct pipe_screen * screen = st_context(ctx)->pipe->screen; 4852 unsigned pipe_shader_type; 4853 GLenum target; 4854 const char *target_string; 4855 bool progress; 4856 struct gl_shader_compiler_options *options = 4857 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4858 4859 switch (shader->Type) { 4860 case GL_VERTEX_SHADER: 4861 target = GL_VERTEX_PROGRAM_ARB; 4862 target_string = "vertex"; 4863 pipe_shader_type = PIPE_SHADER_VERTEX; 4864 break; 4865 case GL_FRAGMENT_SHADER: 4866 target = GL_FRAGMENT_PROGRAM_ARB; 4867 target_string = "fragment"; 4868 pipe_shader_type = PIPE_SHADER_FRAGMENT; 4869 break; 4870 case GL_GEOMETRY_SHADER: 4871 target = GL_GEOMETRY_PROGRAM_NV; 4872 target_string = "geometry"; 4873 pipe_shader_type = PIPE_SHADER_GEOMETRY; 4874 break; 4875 default: 4876 assert(!"should not be reached"); 4877 return NULL; 4878 } 4879 4880 validate_ir_tree(shader->ir); 4881 4882 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4883 if (!prog) 4884 return NULL; 4885 prog->Parameters = _mesa_new_parameter_list(); 4886 v->ctx = ctx; 4887 v->prog = prog; 4888 v->shader_program = shader_program; 4889 v->options = options; 4890 v->glsl_version = ctx->Const.GLSLVersion; 4891 v->native_integers = ctx->Const.NativeIntegers; 4892 v->num_clip_distances = num_clip_distances; 4893 4894 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4895 prog->Parameters); 4896 4897 /* Emit intermediate IR for main(). */ 4898 visit_exec_list(shader->ir, v); 4899 4900 /* Now emit bodies for any functions that were used. */ 4901 do { 4902 progress = GL_FALSE; 4903 4904 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4905 function_entry *entry = (function_entry *)iter.get(); 4906 4907 if (!entry->bgn_inst) { 4908 v->current_function = entry; 4909 4910 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4911 entry->bgn_inst->function = entry; 4912 4913 visit_exec_list(&entry->sig->body, v); 4914 4915 glsl_to_tgsi_instruction *last; 4916 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4917 if (last->op != TGSI_OPCODE_RET) 4918 v->emit(NULL, TGSI_OPCODE_RET); 4919 4920 glsl_to_tgsi_instruction *end; 4921 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4922 end->function = entry; 4923 4924 progress = GL_TRUE; 4925 } 4926 } 4927 } while (progress); 4928 4929#if 0 4930 /* Print out some information (for debugging purposes) used by the 4931 * optimization passes. */ 4932 for (i=0; i < v->next_temp; i++) { 4933 int fr = v->get_first_temp_read(i); 4934 int fw = v->get_first_temp_write(i); 4935 int lr = v->get_last_temp_read(i); 4936 int lw = v->get_last_temp_write(i); 4937 4938 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4939 assert(fw <= fr); 4940 } 4941#endif 4942 4943 if (!screen->get_shader_param(screen, pipe_shader_type, 4944 PIPE_SHADER_CAP_OUTPUT_READ)) { 4945 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4946 v->remove_output_reads(PROGRAM_OUTPUT); 4947 if (target == GL_VERTEX_PROGRAM_ARB) 4948 v->remove_output_reads(PROGRAM_VARYING); 4949 } 4950 4951 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4952 v->simplify_cmp(); 4953 v->copy_propagate(); 4954 while (v->eliminate_dead_code_advanced()); 4955 4956 /* FIXME: These passes to optimize temporary registers don't work when there 4957 * is indirect addressing of the temporary register space. We need proper 4958 * array support so that we don't have to give up these passes in every 4959 * shader that uses arrays. 4960 */ 4961 if (!v->indirect_addr_temps) { 4962 v->eliminate_dead_code(); 4963 v->merge_registers(); 4964 v->renumber_registers(); 4965 } 4966 4967 /* Write the END instruction. */ 4968 v->emit(NULL, TGSI_OPCODE_END); 4969 4970 if (ctx->Shader.Flags & GLSL_DUMP) { 4971 printf("\n"); 4972 printf("GLSL IR for linked %s program %d:\n", target_string, 4973 shader_program->Name); 4974 _mesa_print_ir(shader->ir, NULL); 4975 printf("\n"); 4976 printf("\n"); 4977 fflush(stdout); 4978 } 4979 4980 prog->Instructions = NULL; 4981 prog->NumInstructions = 0; 4982 4983 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4984 count_resources(v, prog); 4985 4986 _mesa_reference_program(ctx, &shader->Program, prog); 4987 4988 /* This has to be done last. Any operation the can cause 4989 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4990 * program constant) has to happen before creating this linkage. 4991 */ 4992 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4993 if (!shader_program->LinkStatus) { 4994 return NULL; 4995 } 4996 4997 struct st_vertex_program *stvp; 4998 struct st_fragment_program *stfp; 4999 struct st_geometry_program *stgp; 5000 5001 switch (shader->Type) { 5002 case GL_VERTEX_SHADER: 5003 stvp = (struct st_vertex_program *)prog; 5004 stvp->glsl_to_tgsi = v; 5005 break; 5006 case GL_FRAGMENT_SHADER: 5007 stfp = (struct st_fragment_program *)prog; 5008 stfp->glsl_to_tgsi = v; 5009 break; 5010 case GL_GEOMETRY_SHADER: 5011 stgp = (struct st_geometry_program *)prog; 5012 stgp->glsl_to_tgsi = v; 5013 break; 5014 default: 5015 assert(!"should not be reached"); 5016 return NULL; 5017 } 5018 5019 return prog; 5020} 5021 5022/** 5023 * Searches through the IR for a declaration of gl_ClipDistance and returns the 5024 * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is 5025 * not declared in the IR. 5026 */ 5027int get_clip_distance_size(exec_list *ir) 5028{ 5029 foreach_iter (exec_list_iterator, iter, *ir) { 5030 ir_instruction *inst = (ir_instruction *)iter.get(); 5031 ir_variable *var = inst->as_variable(); 5032 if (var == NULL) continue; 5033 if (!strcmp(var->name, "gl_ClipDistance")) { 5034 return var->type->length; 5035 } 5036 } 5037 5038 return 0; 5039} 5040 5041extern "C" { 5042 5043struct gl_shader * 5044st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 5045{ 5046 struct gl_shader *shader; 5047 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 5048 type == GL_GEOMETRY_SHADER_ARB); 5049 shader = rzalloc(NULL, struct gl_shader); 5050 if (shader) { 5051 shader->Type = type; 5052 shader->Name = name; 5053 _mesa_init_shader(ctx, shader); 5054 } 5055 return shader; 5056} 5057 5058struct gl_shader_program * 5059st_new_shader_program(struct gl_context *ctx, GLuint name) 5060{ 5061 struct gl_shader_program *shProg; 5062 shProg = rzalloc(NULL, struct gl_shader_program); 5063 if (shProg) { 5064 shProg->Name = name; 5065 _mesa_init_shader_program(ctx, shProg); 5066 } 5067 return shProg; 5068} 5069 5070/** 5071 * Link a shader. 5072 * Called via ctx->Driver.LinkShader() 5073 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 5074 * with code lowering and other optimizations. 5075 */ 5076GLboolean 5077st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 5078{ 5079 int num_clip_distances[MESA_SHADER_TYPES]; 5080 assert(prog->LinkStatus); 5081 5082 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5083 if (prog->_LinkedShaders[i] == NULL) 5084 continue; 5085 5086 bool progress; 5087 exec_list *ir = prog->_LinkedShaders[i]->ir; 5088 const struct gl_shader_compiler_options *options = 5089 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 5090 5091 /* We have to determine the length of the gl_ClipDistance array before 5092 * the array is lowered to two vec4s by lower_clip_distance(). 5093 */ 5094 num_clip_distances[i] = get_clip_distance_size(ir); 5095 5096 do { 5097 progress = false; 5098 5099 /* Lowering */ 5100 do_mat_op_to_vec(ir); 5101 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 5102 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 5103 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 5104 5105 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5106 5107 progress = do_common_optimization(ir, true, true, 5108 options->MaxUnrollIterations) 5109 || progress; 5110 5111 progress = lower_quadop_vector(ir, false) || progress; 5112 progress = lower_clip_distance(ir) || progress; 5113 5114 if (options->MaxIfDepth == 0) 5115 progress = lower_discard(ir) || progress; 5116 5117 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5118 5119 if (options->EmitNoNoise) 5120 progress = lower_noise(ir) || progress; 5121 5122 /* If there are forms of indirect addressing that the driver 5123 * cannot handle, perform the lowering pass. 5124 */ 5125 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5126 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5127 progress = 5128 lower_variable_index_to_cond_assign(ir, 5129 options->EmitNoIndirectInput, 5130 options->EmitNoIndirectOutput, 5131 options->EmitNoIndirectTemp, 5132 options->EmitNoIndirectUniform) 5133 || progress; 5134 5135 progress = do_vec_index_to_cond_assign(ir) || progress; 5136 } while (progress); 5137 5138 validate_ir_tree(ir); 5139 } 5140 5141 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5142 struct gl_program *linked_prog; 5143 5144 if (prog->_LinkedShaders[i] == NULL) 5145 continue; 5146 5147 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i], 5148 num_clip_distances[i]); 5149 5150 if (linked_prog) { 5151 static const GLenum targets[] = { 5152 GL_VERTEX_PROGRAM_ARB, 5153 GL_FRAGMENT_PROGRAM_ARB, 5154 GL_GEOMETRY_PROGRAM_NV 5155 }; 5156 5157 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5158 linked_prog); 5159 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5160 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5161 NULL); 5162 _mesa_reference_program(ctx, &linked_prog, NULL); 5163 return GL_FALSE; 5164 } 5165 } 5166 5167 _mesa_reference_program(ctx, &linked_prog, NULL); 5168 } 5169 5170 return GL_TRUE; 5171} 5172 5173void 5174st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi, 5175 const GLuint outputMapping[], 5176 struct pipe_stream_output_info *so) 5177{ 5178 static unsigned comps_to_mask[] = { 5179 0, 5180 TGSI_WRITEMASK_X, 5181 TGSI_WRITEMASK_XY, 5182 TGSI_WRITEMASK_XYZ, 5183 TGSI_WRITEMASK_XYZW 5184 }; 5185 unsigned i; 5186 struct gl_transform_feedback_info *info = 5187 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5188 5189 for (i = 0; i < info->NumOutputs; i++) { 5190 assert(info->Outputs[i].NumComponents < Elements(comps_to_mask)); 5191 so->output[i].register_index = 5192 outputMapping[info->Outputs[i].OutputRegister]; 5193 so->output[i].register_mask = 5194 comps_to_mask[info->Outputs[i].NumComponents] 5195 << info->Outputs[i].ComponentOffset; 5196 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5197 } 5198 so->num_outputs = info->NumOutputs; 5199} 5200 5201} /* extern "C" */ 5202