st_glsl_to_tgsi.cpp revision c8fed01c732fa20e1ae035ed5f7b6156a5d6ffe9
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45extern "C" { 46#include "main/mtypes.h" 47#include "main/shaderapi.h" 48#include "main/shaderobj.h" 49#include "main/uniforms.h" 50#include "program/hash_table.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_uniform.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81#define MAX_TEMPS 4096 82 83class st_src_reg; 84class st_dst_reg; 85 86static int swizzle_for_size(int size); 87 88/** 89 * This struct is a corresponding struct to TGSI ureg_src. 90 */ 91class st_src_reg { 92public: 93 st_src_reg(gl_register_file file, int index, const glsl_type *type) 94 { 95 this->file = file; 96 this->index = index; 97 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 98 this->swizzle = swizzle_for_size(type->vector_elements); 99 else 100 this->swizzle = SWIZZLE_XYZW; 101 this->negate = 0; 102 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 103 this->reladdr = NULL; 104 } 105 106 st_src_reg(gl_register_file file, int index, int type) 107 { 108 this->type = type; 109 this->file = file; 110 this->index = index; 111 this->swizzle = SWIZZLE_XYZW; 112 this->negate = 0; 113 this->reladdr = NULL; 114 } 115 116 st_src_reg() 117 { 118 this->type = GLSL_TYPE_ERROR; 119 this->file = PROGRAM_UNDEFINED; 120 this->index = 0; 121 this->swizzle = 0; 122 this->negate = 0; 123 this->reladdr = NULL; 124 } 125 126 explicit st_src_reg(st_dst_reg reg); 127 128 gl_register_file file; /**< PROGRAM_* from Mesa */ 129 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 130 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 131 int negate; /**< NEGATE_XYZW mask from mesa */ 132 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 133 /** Register index should be offset by the integer in this reg. */ 134 st_src_reg *reladdr; 135}; 136 137class st_dst_reg { 138public: 139 st_dst_reg(gl_register_file file, int writemask, int type) 140 { 141 this->file = file; 142 this->index = 0; 143 this->writemask = writemask; 144 this->cond_mask = COND_TR; 145 this->reladdr = NULL; 146 this->type = type; 147 } 148 149 st_dst_reg() 150 { 151 this->type = GLSL_TYPE_ERROR; 152 this->file = PROGRAM_UNDEFINED; 153 this->index = 0; 154 this->writemask = 0; 155 this->cond_mask = COND_TR; 156 this->reladdr = NULL; 157 } 158 159 explicit st_dst_reg(st_src_reg reg); 160 161 gl_register_file file; /**< PROGRAM_* from Mesa */ 162 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 163 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 164 GLuint cond_mask:4; 165 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 166 /** Register index should be offset by the integer in this reg. */ 167 st_src_reg *reladdr; 168}; 169 170st_src_reg::st_src_reg(st_dst_reg reg) 171{ 172 this->type = reg.type; 173 this->file = reg.file; 174 this->index = reg.index; 175 this->swizzle = SWIZZLE_XYZW; 176 this->negate = 0; 177 this->reladdr = reg.reladdr; 178} 179 180st_dst_reg::st_dst_reg(st_src_reg reg) 181{ 182 this->type = reg.type; 183 this->file = reg.file; 184 this->index = reg.index; 185 this->writemask = WRITEMASK_XYZW; 186 this->cond_mask = COND_TR; 187 this->reladdr = reg.reladdr; 188} 189 190class glsl_to_tgsi_instruction : public exec_node { 191public: 192 /* Callers of this ralloc-based new need not call delete. It's 193 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 194 static void* operator new(size_t size, void *ctx) 195 { 196 void *node; 197 198 node = rzalloc_size(ctx, size); 199 assert(node != NULL); 200 201 return node; 202 } 203 204 unsigned op; 205 st_dst_reg dst; 206 st_src_reg src[3]; 207 /** Pointer to the ir source this tree came from for debugging */ 208 ir_instruction *ir; 209 GLboolean cond_update; 210 bool saturate; 211 int sampler; /**< sampler index */ 212 int tex_target; /**< One of TEXTURE_*_INDEX */ 213 GLboolean tex_shadow; 214 int dead_mask; /**< Used in dead code elimination */ 215 216 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 217}; 218 219class variable_storage : public exec_node { 220public: 221 variable_storage(ir_variable *var, gl_register_file file, int index) 222 : file(file), index(index), var(var) 223 { 224 /* empty */ 225 } 226 227 gl_register_file file; 228 int index; 229 ir_variable *var; /* variable that maps to this, if any */ 230}; 231 232class immediate_storage : public exec_node { 233public: 234 immediate_storage(gl_constant_value *values, int size, int type) 235 { 236 memcpy(this->values, values, size * sizeof(gl_constant_value)); 237 this->size = size; 238 this->type = type; 239 } 240 241 gl_constant_value values[4]; 242 int size; /**< Number of components (1-4) */ 243 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 244}; 245 246class function_entry : public exec_node { 247public: 248 ir_function_signature *sig; 249 250 /** 251 * identifier of this function signature used by the program. 252 * 253 * At the point that TGSI instructions for function calls are 254 * generated, we don't know the address of the first instruction of 255 * the function body. So we make the BranchTarget that is called a 256 * small integer and rewrite them during set_branchtargets(). 257 */ 258 int sig_id; 259 260 /** 261 * Pointer to first instruction of the function body. 262 * 263 * Set during function body emits after main() is processed. 264 */ 265 glsl_to_tgsi_instruction *bgn_inst; 266 267 /** 268 * Index of the first instruction of the function body in actual TGSI. 269 * 270 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 271 */ 272 int inst; 273 274 /** Storage for the return value. */ 275 st_src_reg return_reg; 276}; 277 278class glsl_to_tgsi_visitor : public ir_visitor { 279public: 280 glsl_to_tgsi_visitor(); 281 ~glsl_to_tgsi_visitor(); 282 283 function_entry *current_function; 284 285 struct gl_context *ctx; 286 struct gl_program *prog; 287 struct gl_shader_program *shader_program; 288 struct gl_shader_compiler_options *options; 289 290 int next_temp; 291 292 int num_address_regs; 293 int samplers_used; 294 bool indirect_addr_temps; 295 bool indirect_addr_consts; 296 297 int glsl_version; 298 bool native_integers; 299 300 variable_storage *find_variable_storage(ir_variable *var); 301 302 int add_constant(gl_register_file file, gl_constant_value values[4], 303 int size, int datatype, GLuint *swizzle_out); 304 305 function_entry *get_function_signature(ir_function_signature *sig); 306 307 st_src_reg get_temp(const glsl_type *type); 308 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 309 310 st_src_reg st_src_reg_for_float(float val); 311 st_src_reg st_src_reg_for_int(int val); 312 st_src_reg st_src_reg_for_type(int type, int val); 313 314 /** 315 * \name Visit methods 316 * 317 * As typical for the visitor pattern, there must be one \c visit method for 318 * each concrete subclass of \c ir_instruction. Virtual base classes within 319 * the hierarchy should not have \c visit methods. 320 */ 321 /*@{*/ 322 virtual void visit(ir_variable *); 323 virtual void visit(ir_loop *); 324 virtual void visit(ir_loop_jump *); 325 virtual void visit(ir_function_signature *); 326 virtual void visit(ir_function *); 327 virtual void visit(ir_expression *); 328 virtual void visit(ir_swizzle *); 329 virtual void visit(ir_dereference_variable *); 330 virtual void visit(ir_dereference_array *); 331 virtual void visit(ir_dereference_record *); 332 virtual void visit(ir_assignment *); 333 virtual void visit(ir_constant *); 334 virtual void visit(ir_call *); 335 virtual void visit(ir_return *); 336 virtual void visit(ir_discard *); 337 virtual void visit(ir_texture *); 338 virtual void visit(ir_if *); 339 /*@}*/ 340 341 st_src_reg result; 342 343 /** List of variable_storage */ 344 exec_list variables; 345 346 /** List of immediate_storage */ 347 exec_list immediates; 348 int num_immediates; 349 350 /** List of function_entry */ 351 exec_list function_signatures; 352 int next_signature_id; 353 354 /** List of glsl_to_tgsi_instruction */ 355 exec_list instructions; 356 357 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 358 359 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 360 st_dst_reg dst, st_src_reg src0); 361 362 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 363 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 364 365 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 366 st_dst_reg dst, 367 st_src_reg src0, st_src_reg src1, st_src_reg src2); 368 369 unsigned get_opcode(ir_instruction *ir, unsigned op, 370 st_dst_reg dst, 371 st_src_reg src0, st_src_reg src1); 372 373 /** 374 * Emit the correct dot-product instruction for the type of arguments 375 */ 376 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 377 st_dst_reg dst, 378 st_src_reg src0, 379 st_src_reg src1, 380 unsigned elements); 381 382 void emit_scalar(ir_instruction *ir, unsigned op, 383 st_dst_reg dst, st_src_reg src0); 384 385 void emit_scalar(ir_instruction *ir, unsigned op, 386 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 387 388 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 389 390 void emit_scs(ir_instruction *ir, unsigned op, 391 st_dst_reg dst, const st_src_reg &src); 392 393 bool try_emit_mad(ir_expression *ir, 394 int mul_operand); 395 bool try_emit_mad_for_and_not(ir_expression *ir, 396 int mul_operand); 397 bool try_emit_sat(ir_expression *ir); 398 399 void emit_swz(ir_expression *ir); 400 401 bool process_move_condition(ir_rvalue *ir); 402 403 void remove_output_reads(gl_register_file type); 404 void simplify_cmp(void); 405 406 void rename_temp_register(int index, int new_index); 407 int get_first_temp_read(int index); 408 int get_first_temp_write(int index); 409 int get_last_temp_read(int index); 410 int get_last_temp_write(int index); 411 412 void copy_propagate(void); 413 void eliminate_dead_code(void); 414 int eliminate_dead_code_advanced(void); 415 void merge_registers(void); 416 void renumber_registers(void); 417 418 void *mem_ctx; 419}; 420 421static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 422 423static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 424 425static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 426 427static void 428fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 429 430static void 431fail_link(struct gl_shader_program *prog, const char *fmt, ...) 432{ 433 va_list args; 434 va_start(args, fmt); 435 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 436 va_end(args); 437 438 prog->LinkStatus = GL_FALSE; 439} 440 441static int 442swizzle_for_size(int size) 443{ 444 int size_swizzles[4] = { 445 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 446 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 447 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 448 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 449 }; 450 451 assert((size >= 1) && (size <= 4)); 452 return size_swizzles[size - 1]; 453} 454 455static bool 456is_tex_instruction(unsigned opcode) 457{ 458 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 459 return info->is_tex; 460} 461 462static unsigned 463num_inst_dst_regs(unsigned opcode) 464{ 465 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 466 return info->num_dst; 467} 468 469static unsigned 470num_inst_src_regs(unsigned opcode) 471{ 472 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 473 return info->is_tex ? info->num_src - 1 : info->num_src; 474} 475 476glsl_to_tgsi_instruction * 477glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 478 st_dst_reg dst, 479 st_src_reg src0, st_src_reg src1, st_src_reg src2) 480{ 481 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 482 int num_reladdr = 0, i; 483 484 op = get_opcode(ir, op, dst, src0, src1); 485 486 /* If we have to do relative addressing, we want to load the ARL 487 * reg directly for one of the regs, and preload the other reladdr 488 * sources into temps. 489 */ 490 num_reladdr += dst.reladdr != NULL; 491 num_reladdr += src0.reladdr != NULL; 492 num_reladdr += src1.reladdr != NULL; 493 num_reladdr += src2.reladdr != NULL; 494 495 reladdr_to_temp(ir, &src2, &num_reladdr); 496 reladdr_to_temp(ir, &src1, &num_reladdr); 497 reladdr_to_temp(ir, &src0, &num_reladdr); 498 499 if (dst.reladdr) { 500 emit_arl(ir, address_reg, *dst.reladdr); 501 num_reladdr--; 502 } 503 assert(num_reladdr == 0); 504 505 inst->op = op; 506 inst->dst = dst; 507 inst->src[0] = src0; 508 inst->src[1] = src1; 509 inst->src[2] = src2; 510 inst->ir = ir; 511 inst->dead_mask = 0; 512 513 inst->function = NULL; 514 515 if (op == TGSI_OPCODE_ARL) 516 this->num_address_regs = 1; 517 518 /* Update indirect addressing status used by TGSI */ 519 if (dst.reladdr) { 520 switch(dst.file) { 521 case PROGRAM_TEMPORARY: 522 this->indirect_addr_temps = true; 523 break; 524 case PROGRAM_LOCAL_PARAM: 525 case PROGRAM_ENV_PARAM: 526 case PROGRAM_STATE_VAR: 527 case PROGRAM_NAMED_PARAM: 528 case PROGRAM_CONSTANT: 529 case PROGRAM_UNIFORM: 530 this->indirect_addr_consts = true; 531 break; 532 case PROGRAM_IMMEDIATE: 533 assert(!"immediates should not have indirect addressing"); 534 break; 535 default: 536 break; 537 } 538 } 539 else { 540 for (i=0; i<3; i++) { 541 if(inst->src[i].reladdr) { 542 switch(inst->src[i].file) { 543 case PROGRAM_TEMPORARY: 544 this->indirect_addr_temps = true; 545 break; 546 case PROGRAM_LOCAL_PARAM: 547 case PROGRAM_ENV_PARAM: 548 case PROGRAM_STATE_VAR: 549 case PROGRAM_NAMED_PARAM: 550 case PROGRAM_CONSTANT: 551 case PROGRAM_UNIFORM: 552 this->indirect_addr_consts = true; 553 break; 554 case PROGRAM_IMMEDIATE: 555 assert(!"immediates should not have indirect addressing"); 556 break; 557 default: 558 break; 559 } 560 } 561 } 562 } 563 564 this->instructions.push_tail(inst); 565 566 return inst; 567} 568 569 570glsl_to_tgsi_instruction * 571glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 572 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 573{ 574 return emit(ir, op, dst, src0, src1, undef_src); 575} 576 577glsl_to_tgsi_instruction * 578glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 579 st_dst_reg dst, st_src_reg src0) 580{ 581 assert(dst.writemask != 0); 582 return emit(ir, op, dst, src0, undef_src, undef_src); 583} 584 585glsl_to_tgsi_instruction * 586glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 587{ 588 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 589} 590 591/** 592 * Determines whether to use an integer, unsigned integer, or float opcode 593 * based on the operands and input opcode, then emits the result. 594 * 595 * TODO: type checking for remaining TGSI opcodes 596 */ 597unsigned 598glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 599 st_dst_reg dst, 600 st_src_reg src0, st_src_reg src1) 601{ 602 int type = GLSL_TYPE_FLOAT; 603 604 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 605 type = GLSL_TYPE_FLOAT; 606 else if (native_integers) 607 type = src0.type; 608 609#define case4(c, f, i, u) \ 610 case TGSI_OPCODE_##c: \ 611 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 612 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 613 else op = TGSI_OPCODE_##f; \ 614 break; 615#define case3(f, i, u) case4(f, f, i, u) 616#define case2fi(f, i) case4(f, f, i, i) 617#define case2iu(i, u) case4(i, LAST, i, u) 618 619 switch(op) { 620 case2fi(ADD, UADD); 621 case2fi(MUL, UMUL); 622 case2fi(MAD, UMAD); 623 case3(DIV, IDIV, UDIV); 624 case3(MAX, IMAX, UMAX); 625 case3(MIN, IMIN, UMIN); 626 case2iu(MOD, UMOD); 627 628 case2fi(SEQ, USEQ); 629 case2fi(SNE, USNE); 630 case3(SGE, ISGE, USGE); 631 case3(SLT, ISLT, USLT); 632 633 case2iu(SHL, SHL); 634 case2iu(ISHR, USHR); 635 case2iu(NOT, NOT); 636 case2iu(AND, AND); 637 case2iu(OR, OR); 638 case2iu(XOR, XOR); 639 640 default: break; 641 } 642 643 assert(op != TGSI_OPCODE_LAST); 644 return op; 645} 646 647glsl_to_tgsi_instruction * 648glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 649 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 650 unsigned elements) 651{ 652 static const unsigned dot_opcodes[] = { 653 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 654 }; 655 656 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 657} 658 659/** 660 * Emits TGSI scalar opcodes to produce unique answers across channels. 661 * 662 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 663 * channel determines the result across all channels. So to do a vec4 664 * of this operation, we want to emit a scalar per source channel used 665 * to produce dest channels. 666 */ 667void 668glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 669 st_dst_reg dst, 670 st_src_reg orig_src0, st_src_reg orig_src1) 671{ 672 int i, j; 673 int done_mask = ~dst.writemask; 674 675 /* TGSI RCP is a scalar operation splatting results to all channels, 676 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 677 * dst channels. 678 */ 679 for (i = 0; i < 4; i++) { 680 GLuint this_mask = (1 << i); 681 glsl_to_tgsi_instruction *inst; 682 st_src_reg src0 = orig_src0; 683 st_src_reg src1 = orig_src1; 684 685 if (done_mask & this_mask) 686 continue; 687 688 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 689 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 690 for (j = i + 1; j < 4; j++) { 691 /* If there is another enabled component in the destination that is 692 * derived from the same inputs, generate its value on this pass as 693 * well. 694 */ 695 if (!(done_mask & (1 << j)) && 696 GET_SWZ(src0.swizzle, j) == src0_swiz && 697 GET_SWZ(src1.swizzle, j) == src1_swiz) { 698 this_mask |= (1 << j); 699 } 700 } 701 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 702 src0_swiz, src0_swiz); 703 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 704 src1_swiz, src1_swiz); 705 706 inst = emit(ir, op, dst, src0, src1); 707 inst->dst.writemask = this_mask; 708 done_mask |= this_mask; 709 } 710} 711 712void 713glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 714 st_dst_reg dst, st_src_reg src0) 715{ 716 st_src_reg undef = undef_src; 717 718 undef.swizzle = SWIZZLE_XXXX; 719 720 emit_scalar(ir, op, dst, src0, undef); 721} 722 723void 724glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 725 st_dst_reg dst, st_src_reg src0) 726{ 727 st_src_reg tmp = get_temp(glsl_type::float_type); 728 729 if (src0.type == GLSL_TYPE_INT) 730 emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); 731 else if (src0.type == GLSL_TYPE_UINT) 732 emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); 733 else 734 tmp = src0; 735 736 emit(NULL, TGSI_OPCODE_ARL, dst, tmp); 737} 738 739/** 740 * Emit an TGSI_OPCODE_SCS instruction 741 * 742 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 743 * Instead of splatting its result across all four components of the 744 * destination, it writes one value to the \c x component and another value to 745 * the \c y component. 746 * 747 * \param ir IR instruction being processed 748 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 749 * on which value is desired. 750 * \param dst Destination register 751 * \param src Source register 752 */ 753void 754glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 755 st_dst_reg dst, 756 const st_src_reg &src) 757{ 758 /* Vertex programs cannot use the SCS opcode. 759 */ 760 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 761 emit_scalar(ir, op, dst, src); 762 return; 763 } 764 765 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 766 const unsigned scs_mask = (1U << component); 767 int done_mask = ~dst.writemask; 768 st_src_reg tmp; 769 770 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 771 772 /* If there are compnents in the destination that differ from the component 773 * that will be written by the SCS instrution, we'll need a temporary. 774 */ 775 if (scs_mask != unsigned(dst.writemask)) { 776 tmp = get_temp(glsl_type::vec4_type); 777 } 778 779 for (unsigned i = 0; i < 4; i++) { 780 unsigned this_mask = (1U << i); 781 st_src_reg src0 = src; 782 783 if ((done_mask & this_mask) != 0) 784 continue; 785 786 /* The source swizzle specified which component of the source generates 787 * sine / cosine for the current component in the destination. The SCS 788 * instruction requires that this value be swizzle to the X component. 789 * Replace the current swizzle with a swizzle that puts the source in 790 * the X component. 791 */ 792 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 793 794 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 795 src0_swiz, src0_swiz); 796 for (unsigned j = i + 1; j < 4; j++) { 797 /* If there is another enabled component in the destination that is 798 * derived from the same inputs, generate its value on this pass as 799 * well. 800 */ 801 if (!(done_mask & (1 << j)) && 802 GET_SWZ(src0.swizzle, j) == src0_swiz) { 803 this_mask |= (1 << j); 804 } 805 } 806 807 if (this_mask != scs_mask) { 808 glsl_to_tgsi_instruction *inst; 809 st_dst_reg tmp_dst = st_dst_reg(tmp); 810 811 /* Emit the SCS instruction. 812 */ 813 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 814 inst->dst.writemask = scs_mask; 815 816 /* Move the result of the SCS instruction to the desired location in 817 * the destination. 818 */ 819 tmp.swizzle = MAKE_SWIZZLE4(component, component, 820 component, component); 821 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 822 inst->dst.writemask = this_mask; 823 } else { 824 /* Emit the SCS instruction to write directly to the destination. 825 */ 826 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 827 inst->dst.writemask = scs_mask; 828 } 829 830 done_mask |= this_mask; 831 } 832} 833 834int 835glsl_to_tgsi_visitor::add_constant(gl_register_file file, 836 gl_constant_value values[4], int size, int datatype, 837 GLuint *swizzle_out) 838{ 839 if (file == PROGRAM_CONSTANT) { 840 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 841 size, datatype, swizzle_out); 842 } else { 843 int index = 0; 844 immediate_storage *entry; 845 assert(file == PROGRAM_IMMEDIATE); 846 847 /* Search immediate storage to see if we already have an identical 848 * immediate that we can use instead of adding a duplicate entry. 849 */ 850 foreach_iter(exec_list_iterator, iter, this->immediates) { 851 entry = (immediate_storage *)iter.get(); 852 853 if (entry->size == size && 854 entry->type == datatype && 855 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 856 return index; 857 } 858 index++; 859 } 860 861 /* Add this immediate to the list. */ 862 entry = new(mem_ctx) immediate_storage(values, size, datatype); 863 this->immediates.push_tail(entry); 864 this->num_immediates++; 865 return index; 866 } 867} 868 869st_src_reg 870glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 871{ 872 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 873 union gl_constant_value uval; 874 875 uval.f = val; 876 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 877 878 return src; 879} 880 881st_src_reg 882glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 883{ 884 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 885 union gl_constant_value uval; 886 887 assert(native_integers); 888 889 uval.i = val; 890 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 891 892 return src; 893} 894 895st_src_reg 896glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 897{ 898 if (native_integers) 899 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 900 st_src_reg_for_int(val); 901 else 902 return st_src_reg_for_float(val); 903} 904 905static int 906type_size(const struct glsl_type *type) 907{ 908 unsigned int i; 909 int size; 910 911 switch (type->base_type) { 912 case GLSL_TYPE_UINT: 913 case GLSL_TYPE_INT: 914 case GLSL_TYPE_FLOAT: 915 case GLSL_TYPE_BOOL: 916 if (type->is_matrix()) { 917 return type->matrix_columns; 918 } else { 919 /* Regardless of size of vector, it gets a vec4. This is bad 920 * packing for things like floats, but otherwise arrays become a 921 * mess. Hopefully a later pass over the code can pack scalars 922 * down if appropriate. 923 */ 924 return 1; 925 } 926 case GLSL_TYPE_ARRAY: 927 assert(type->length > 0); 928 return type_size(type->fields.array) * type->length; 929 case GLSL_TYPE_STRUCT: 930 size = 0; 931 for (i = 0; i < type->length; i++) { 932 size += type_size(type->fields.structure[i].type); 933 } 934 return size; 935 case GLSL_TYPE_SAMPLER: 936 /* Samplers take up one slot in UNIFORMS[], but they're baked in 937 * at link time. 938 */ 939 return 1; 940 default: 941 assert(0); 942 return 0; 943 } 944} 945 946/** 947 * In the initial pass of codegen, we assign temporary numbers to 948 * intermediate results. (not SSA -- variable assignments will reuse 949 * storage). 950 */ 951st_src_reg 952glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 953{ 954 st_src_reg src; 955 956 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 957 src.file = PROGRAM_TEMPORARY; 958 src.index = next_temp; 959 src.reladdr = NULL; 960 next_temp += type_size(type); 961 962 if (type->is_array() || type->is_record()) { 963 src.swizzle = SWIZZLE_NOOP; 964 } else { 965 src.swizzle = swizzle_for_size(type->vector_elements); 966 } 967 src.negate = 0; 968 969 return src; 970} 971 972variable_storage * 973glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 974{ 975 976 variable_storage *entry; 977 978 foreach_iter(exec_list_iterator, iter, this->variables) { 979 entry = (variable_storage *)iter.get(); 980 981 if (entry->var == var) 982 return entry; 983 } 984 985 return NULL; 986} 987 988void 989glsl_to_tgsi_visitor::visit(ir_variable *ir) 990{ 991 if (strcmp(ir->name, "gl_FragCoord") == 0) { 992 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 993 994 fp->OriginUpperLeft = ir->origin_upper_left; 995 fp->PixelCenterInteger = ir->pixel_center_integer; 996 997 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 998 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 999 switch (ir->depth_layout) { 1000 case ir_depth_layout_none: 1001 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; 1002 break; 1003 case ir_depth_layout_any: 1004 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; 1005 break; 1006 case ir_depth_layout_greater: 1007 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; 1008 break; 1009 case ir_depth_layout_less: 1010 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; 1011 break; 1012 case ir_depth_layout_unchanged: 1013 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; 1014 break; 1015 default: 1016 assert(0); 1017 break; 1018 } 1019 } 1020 1021 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1022 unsigned int i; 1023 const ir_state_slot *const slots = ir->state_slots; 1024 assert(ir->state_slots != NULL); 1025 1026 /* Check if this statevar's setup in the STATE file exactly 1027 * matches how we'll want to reference it as a 1028 * struct/array/whatever. If not, then we need to move it into 1029 * temporary storage and hope that it'll get copy-propagated 1030 * out. 1031 */ 1032 for (i = 0; i < ir->num_state_slots; i++) { 1033 if (slots[i].swizzle != SWIZZLE_XYZW) { 1034 break; 1035 } 1036 } 1037 1038 variable_storage *storage; 1039 st_dst_reg dst; 1040 if (i == ir->num_state_slots) { 1041 /* We'll set the index later. */ 1042 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1043 this->variables.push_tail(storage); 1044 1045 dst = undef_dst; 1046 } else { 1047 /* The variable_storage constructor allocates slots based on the size 1048 * of the type. However, this had better match the number of state 1049 * elements that we're going to copy into the new temporary. 1050 */ 1051 assert((int) ir->num_state_slots == type_size(ir->type)); 1052 1053 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1054 this->next_temp); 1055 this->variables.push_tail(storage); 1056 this->next_temp += type_size(ir->type); 1057 1058 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1059 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1060 } 1061 1062 1063 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1064 int index = _mesa_add_state_reference(this->prog->Parameters, 1065 (gl_state_index *)slots[i].tokens); 1066 1067 if (storage->file == PROGRAM_STATE_VAR) { 1068 if (storage->index == -1) { 1069 storage->index = index; 1070 } else { 1071 assert(index == storage->index + (int)i); 1072 } 1073 } else { 1074 st_src_reg src(PROGRAM_STATE_VAR, index, 1075 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1076 src.swizzle = slots[i].swizzle; 1077 emit(ir, TGSI_OPCODE_MOV, dst, src); 1078 /* even a float takes up a whole vec4 reg in a struct/array. */ 1079 dst.index++; 1080 } 1081 } 1082 1083 if (storage->file == PROGRAM_TEMPORARY && 1084 dst.index != storage->index + (int) ir->num_state_slots) { 1085 fail_link(this->shader_program, 1086 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1087 ir->name, dst.index - storage->index, 1088 type_size(ir->type)); 1089 } 1090 } 1091} 1092 1093void 1094glsl_to_tgsi_visitor::visit(ir_loop *ir) 1095{ 1096 ir_dereference_variable *counter = NULL; 1097 1098 if (ir->counter != NULL) 1099 counter = new(ir) ir_dereference_variable(ir->counter); 1100 1101 if (ir->from != NULL) { 1102 assert(ir->counter != NULL); 1103 1104 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1105 1106 a->accept(this); 1107 delete a; 1108 } 1109 1110 emit(NULL, TGSI_OPCODE_BGNLOOP); 1111 1112 if (ir->to) { 1113 ir_expression *e = 1114 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1115 counter, ir->to); 1116 ir_if *if_stmt = new(ir) ir_if(e); 1117 1118 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1119 1120 if_stmt->then_instructions.push_tail(brk); 1121 1122 if_stmt->accept(this); 1123 1124 delete if_stmt; 1125 delete e; 1126 delete brk; 1127 } 1128 1129 visit_exec_list(&ir->body_instructions, this); 1130 1131 if (ir->increment) { 1132 ir_expression *e = 1133 new(ir) ir_expression(ir_binop_add, counter->type, 1134 counter, ir->increment); 1135 1136 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1137 1138 a->accept(this); 1139 delete a; 1140 delete e; 1141 } 1142 1143 emit(NULL, TGSI_OPCODE_ENDLOOP); 1144} 1145 1146void 1147glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1148{ 1149 switch (ir->mode) { 1150 case ir_loop_jump::jump_break: 1151 emit(NULL, TGSI_OPCODE_BRK); 1152 break; 1153 case ir_loop_jump::jump_continue: 1154 emit(NULL, TGSI_OPCODE_CONT); 1155 break; 1156 } 1157} 1158 1159 1160void 1161glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1162{ 1163 assert(0); 1164 (void)ir; 1165} 1166 1167void 1168glsl_to_tgsi_visitor::visit(ir_function *ir) 1169{ 1170 /* Ignore function bodies other than main() -- we shouldn't see calls to 1171 * them since they should all be inlined before we get to glsl_to_tgsi. 1172 */ 1173 if (strcmp(ir->name, "main") == 0) { 1174 const ir_function_signature *sig; 1175 exec_list empty; 1176 1177 sig = ir->matching_signature(&empty); 1178 1179 assert(sig); 1180 1181 foreach_iter(exec_list_iterator, iter, sig->body) { 1182 ir_instruction *ir = (ir_instruction *)iter.get(); 1183 1184 ir->accept(this); 1185 } 1186 } 1187} 1188 1189bool 1190glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1191{ 1192 int nonmul_operand = 1 - mul_operand; 1193 st_src_reg a, b, c; 1194 st_dst_reg result_dst; 1195 1196 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1197 if (!expr || expr->operation != ir_binop_mul) 1198 return false; 1199 1200 expr->operands[0]->accept(this); 1201 a = this->result; 1202 expr->operands[1]->accept(this); 1203 b = this->result; 1204 ir->operands[nonmul_operand]->accept(this); 1205 c = this->result; 1206 1207 this->result = get_temp(ir->type); 1208 result_dst = st_dst_reg(this->result); 1209 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1210 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1211 1212 return true; 1213} 1214 1215/** 1216 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1217 * 1218 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1219 * implemented using multiplication, and logical-or is implemented using 1220 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1221 * As result, the logical expression (a & !b) can be rewritten as: 1222 * 1223 * - a * !b 1224 * - a * (1 - b) 1225 * - (a * 1) - (a * b) 1226 * - a + -(a * b) 1227 * - a + (a * -b) 1228 * 1229 * This final expression can be implemented as a single MAD(a, -b, a) 1230 * instruction. 1231 */ 1232bool 1233glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1234{ 1235 const int other_operand = 1 - try_operand; 1236 st_src_reg a, b; 1237 1238 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1239 if (!expr || expr->operation != ir_unop_logic_not) 1240 return false; 1241 1242 ir->operands[other_operand]->accept(this); 1243 a = this->result; 1244 expr->operands[0]->accept(this); 1245 b = this->result; 1246 1247 b.negate = ~b.negate; 1248 1249 this->result = get_temp(ir->type); 1250 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1251 1252 return true; 1253} 1254 1255bool 1256glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1257{ 1258 /* Saturates were only introduced to vertex programs in 1259 * NV_vertex_program3, so don't give them to drivers in the VP. 1260 */ 1261 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1262 return false; 1263 1264 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1265 if (!sat_src) 1266 return false; 1267 1268 sat_src->accept(this); 1269 st_src_reg src = this->result; 1270 1271 /* If we generated an expression instruction into a temporary in 1272 * processing the saturate's operand, apply the saturate to that 1273 * instruction. Otherwise, generate a MOV to do the saturate. 1274 * 1275 * Note that we have to be careful to only do this optimization if 1276 * the instruction in question was what generated src->result. For 1277 * example, ir_dereference_array might generate a MUL instruction 1278 * to create the reladdr, and return us a src reg using that 1279 * reladdr. That MUL result is not the value we're trying to 1280 * saturate. 1281 */ 1282 ir_expression *sat_src_expr = sat_src->as_expression(); 1283 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1284 sat_src_expr->operation == ir_binop_add || 1285 sat_src_expr->operation == ir_binop_dot)) { 1286 glsl_to_tgsi_instruction *new_inst; 1287 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1288 new_inst->saturate = true; 1289 } else { 1290 this->result = get_temp(ir->type); 1291 st_dst_reg result_dst = st_dst_reg(this->result); 1292 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1293 glsl_to_tgsi_instruction *inst; 1294 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1295 inst->saturate = true; 1296 } 1297 1298 return true; 1299} 1300 1301void 1302glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1303 st_src_reg *reg, int *num_reladdr) 1304{ 1305 if (!reg->reladdr) 1306 return; 1307 1308 emit_arl(ir, address_reg, *reg->reladdr); 1309 1310 if (*num_reladdr != 1) { 1311 st_src_reg temp = get_temp(glsl_type::vec4_type); 1312 1313 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1314 *reg = temp; 1315 } 1316 1317 (*num_reladdr)--; 1318} 1319 1320void 1321glsl_to_tgsi_visitor::visit(ir_expression *ir) 1322{ 1323 unsigned int operand; 1324 st_src_reg op[Elements(ir->operands)]; 1325 st_src_reg result_src; 1326 st_dst_reg result_dst; 1327 1328 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1329 */ 1330 if (ir->operation == ir_binop_add) { 1331 if (try_emit_mad(ir, 1)) 1332 return; 1333 if (try_emit_mad(ir, 0)) 1334 return; 1335 } 1336 1337 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1338 */ 1339 if (ir->operation == ir_binop_logic_and) { 1340 if (try_emit_mad_for_and_not(ir, 1)) 1341 return; 1342 if (try_emit_mad_for_and_not(ir, 0)) 1343 return; 1344 } 1345 1346 if (try_emit_sat(ir)) 1347 return; 1348 1349 if (ir->operation == ir_quadop_vector) 1350 assert(!"ir_quadop_vector should have been lowered"); 1351 1352 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1353 this->result.file = PROGRAM_UNDEFINED; 1354 ir->operands[operand]->accept(this); 1355 if (this->result.file == PROGRAM_UNDEFINED) { 1356 ir_print_visitor v; 1357 printf("Failed to get tree for expression operand:\n"); 1358 ir->operands[operand]->accept(&v); 1359 exit(1); 1360 } 1361 op[operand] = this->result; 1362 1363 /* Matrix expression operands should have been broken down to vector 1364 * operations already. 1365 */ 1366 assert(!ir->operands[operand]->type->is_matrix()); 1367 } 1368 1369 int vector_elements = ir->operands[0]->type->vector_elements; 1370 if (ir->operands[1]) { 1371 vector_elements = MAX2(vector_elements, 1372 ir->operands[1]->type->vector_elements); 1373 } 1374 1375 this->result.file = PROGRAM_UNDEFINED; 1376 1377 /* Storage for our result. Ideally for an assignment we'd be using 1378 * the actual storage for the result here, instead. 1379 */ 1380 result_src = get_temp(ir->type); 1381 /* convenience for the emit functions below. */ 1382 result_dst = st_dst_reg(result_src); 1383 /* Limit writes to the channels that will be used by result_src later. 1384 * This does limit this temp's use as a temporary for multi-instruction 1385 * sequences. 1386 */ 1387 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1388 1389 switch (ir->operation) { 1390 case ir_unop_logic_not: 1391 if (result_dst.type != GLSL_TYPE_FLOAT) 1392 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); 1393 else { 1394 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1395 * older GPUs implement SEQ using multiple instructions (i915 uses two 1396 * SGE instructions and a MUL instruction). Since our logic values are 1397 * 0.0 and 1.0, 1-x also implements !x. 1398 */ 1399 op[0].negate = ~op[0].negate; 1400 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1401 } 1402 break; 1403 case ir_unop_neg: 1404 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1405 if (result_dst.type == GLSL_TYPE_INT) 1406 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1407 else { 1408 op[0].negate = ~op[0].negate; 1409 result_src = op[0]; 1410 } 1411 break; 1412 case ir_unop_abs: 1413 assert(result_dst.type == GLSL_TYPE_FLOAT); 1414 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1415 break; 1416 case ir_unop_sign: 1417 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1418 break; 1419 case ir_unop_rcp: 1420 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1421 break; 1422 1423 case ir_unop_exp2: 1424 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1425 break; 1426 case ir_unop_exp: 1427 case ir_unop_log: 1428 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1429 break; 1430 case ir_unop_log2: 1431 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1432 break; 1433 case ir_unop_sin: 1434 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1435 break; 1436 case ir_unop_cos: 1437 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1438 break; 1439 case ir_unop_sin_reduced: 1440 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1441 break; 1442 case ir_unop_cos_reduced: 1443 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1444 break; 1445 1446 case ir_unop_dFdx: 1447 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1448 break; 1449 case ir_unop_dFdy: 1450 op[0].negate = ~op[0].negate; 1451 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1452 break; 1453 1454 case ir_unop_noise: { 1455 /* At some point, a motivated person could add a better 1456 * implementation of noise. Currently not even the nvidia 1457 * binary drivers do anything more than this. In any case, the 1458 * place to do this is in the GL state tracker, not the poor 1459 * driver. 1460 */ 1461 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1462 break; 1463 } 1464 1465 case ir_binop_add: 1466 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1467 break; 1468 case ir_binop_sub: 1469 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1470 break; 1471 1472 case ir_binop_mul: 1473 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1474 break; 1475 case ir_binop_div: 1476 if (result_dst.type == GLSL_TYPE_FLOAT) 1477 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1478 else 1479 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1480 break; 1481 case ir_binop_mod: 1482 if (result_dst.type == GLSL_TYPE_FLOAT) 1483 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1484 else 1485 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1486 break; 1487 1488 case ir_binop_less: 1489 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1490 break; 1491 case ir_binop_greater: 1492 emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); 1493 break; 1494 case ir_binop_lequal: 1495 emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); 1496 break; 1497 case ir_binop_gequal: 1498 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1499 break; 1500 case ir_binop_equal: 1501 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1502 break; 1503 case ir_binop_nequal: 1504 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1505 break; 1506 case ir_binop_all_equal: 1507 /* "==" operator producing a scalar boolean. */ 1508 if (ir->operands[0]->type->is_vector() || 1509 ir->operands[1]->type->is_vector()) { 1510 st_src_reg temp = get_temp(native_integers ? 1511 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1512 glsl_type::vec4_type); 1513 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1514 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1515 1516 /* After the dot-product, the value will be an integer on the 1517 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1518 */ 1519 emit_dp(ir, result_dst, temp, temp, vector_elements); 1520 1521 if (result_dst.type == GLSL_TYPE_FLOAT) { 1522 /* Negating the result of the dot-product gives values on the range 1523 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1524 * This is achieved using SGE. 1525 */ 1526 st_src_reg sge_src = result_src; 1527 sge_src.negate = ~sge_src.negate; 1528 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1529 } else { 1530 /* The TGSI negate flag doesn't work for integers, so use SEQ 0 1531 * instead. 1532 */ 1533 emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0)); 1534 } 1535 } else { 1536 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1537 } 1538 break; 1539 case ir_binop_any_nequal: 1540 /* "!=" operator producing a scalar boolean. */ 1541 if (ir->operands[0]->type->is_vector() || 1542 ir->operands[1]->type->is_vector()) { 1543 st_src_reg temp = get_temp(native_integers ? 1544 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1545 glsl_type::vec4_type); 1546 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1547 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1548 1549 /* After the dot-product, the value will be an integer on the 1550 * range [0,4]. Zero stays zero, and positive values become 1.0. 1551 */ 1552 glsl_to_tgsi_instruction *const dp = 1553 emit_dp(ir, result_dst, temp, temp, vector_elements); 1554 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1555 result_dst.type == GLSL_TYPE_FLOAT) { 1556 /* The clamping to [0,1] can be done for free in the fragment 1557 * shader with a saturate. 1558 */ 1559 dp->saturate = true; 1560 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1561 /* Negating the result of the dot-product gives values on the range 1562 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1563 * achieved using SLT. 1564 */ 1565 st_src_reg slt_src = result_src; 1566 slt_src.negate = ~slt_src.negate; 1567 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1568 } else { 1569 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1570 } 1571 } else { 1572 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1573 } 1574 break; 1575 1576 case ir_unop_any: { 1577 assert(ir->operands[0]->type->is_vector()); 1578 1579 /* After the dot-product, the value will be an integer on the 1580 * range [0,4]. Zero stays zero, and positive values become 1.0. 1581 */ 1582 glsl_to_tgsi_instruction *const dp = 1583 emit_dp(ir, result_dst, op[0], op[0], 1584 ir->operands[0]->type->vector_elements); 1585 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1586 result_dst.type == GLSL_TYPE_FLOAT) { 1587 /* The clamping to [0,1] can be done for free in the fragment 1588 * shader with a saturate. 1589 */ 1590 dp->saturate = true; 1591 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1592 /* Negating the result of the dot-product gives values on the range 1593 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1594 * is achieved using SLT. 1595 */ 1596 st_src_reg slt_src = result_src; 1597 slt_src.negate = ~slt_src.negate; 1598 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1599 } 1600 else { 1601 /* Use SNE 0 if integers are being used as boolean values. */ 1602 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1603 } 1604 break; 1605 } 1606 1607 case ir_binop_logic_xor: 1608 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1609 break; 1610 1611 case ir_binop_logic_or: { 1612 /* After the addition, the value will be an integer on the 1613 * range [0,2]. Zero stays zero, and positive values become 1.0. 1614 */ 1615 glsl_to_tgsi_instruction *add = 1616 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1617 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1618 result_dst.type == GLSL_TYPE_FLOAT) { 1619 /* The clamping to [0,1] can be done for free in the fragment 1620 * shader with a saturate if floats are being used as boolean values. 1621 */ 1622 add->saturate = true; 1623 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1624 /* Negating the result of the addition gives values on the range 1625 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1626 * is achieved using SLT. 1627 */ 1628 st_src_reg slt_src = result_src; 1629 slt_src.negate = ~slt_src.negate; 1630 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1631 } else { 1632 /* Use an SNE on the result of the addition. Zero stays zero, 1633 * 1 stays 1, and 2 becomes 1. 1634 */ 1635 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1636 } 1637 break; 1638 } 1639 1640 case ir_binop_logic_and: 1641 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1642 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1643 break; 1644 1645 case ir_binop_dot: 1646 assert(ir->operands[0]->type->is_vector()); 1647 assert(ir->operands[0]->type == ir->operands[1]->type); 1648 emit_dp(ir, result_dst, op[0], op[1], 1649 ir->operands[0]->type->vector_elements); 1650 break; 1651 1652 case ir_unop_sqrt: 1653 /* sqrt(x) = x * rsq(x). */ 1654 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1655 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1656 /* For incoming channels <= 0, set the result to 0. */ 1657 op[0].negate = ~op[0].negate; 1658 emit(ir, TGSI_OPCODE_CMP, result_dst, 1659 op[0], result_src, st_src_reg_for_float(0.0)); 1660 break; 1661 case ir_unop_rsq: 1662 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1663 break; 1664 case ir_unop_i2f: 1665 case ir_unop_b2f: 1666 if (native_integers) { 1667 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1668 break; 1669 } 1670 case ir_unop_i2u: 1671 case ir_unop_u2i: 1672 /* Converting between signed and unsigned integers is a no-op. */ 1673 case ir_unop_b2i: 1674 /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ 1675 result_src = op[0]; 1676 break; 1677 case ir_unop_f2i: 1678 if (native_integers) 1679 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1680 else 1681 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1682 break; 1683 case ir_unop_f2b: 1684 case ir_unop_i2b: 1685 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1686 st_src_reg_for_type(result_dst.type, 0)); 1687 break; 1688 case ir_unop_trunc: 1689 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1690 break; 1691 case ir_unop_ceil: 1692 op[0].negate = ~op[0].negate; 1693 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1694 result_src.negate = ~result_src.negate; 1695 break; 1696 case ir_unop_floor: 1697 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1698 break; 1699 case ir_unop_fract: 1700 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1701 break; 1702 1703 case ir_binop_min: 1704 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1705 break; 1706 case ir_binop_max: 1707 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1708 break; 1709 case ir_binop_pow: 1710 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1711 break; 1712 1713 case ir_unop_bit_not: 1714 if (glsl_version >= 130) { 1715 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1716 break; 1717 } 1718 case ir_unop_u2f: 1719 if (native_integers) { 1720 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1721 break; 1722 } 1723 case ir_binop_lshift: 1724 if (glsl_version >= 130) { 1725 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); 1726 break; 1727 } 1728 case ir_binop_rshift: 1729 if (glsl_version >= 130) { 1730 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); 1731 break; 1732 } 1733 case ir_binop_bit_and: 1734 if (glsl_version >= 130) { 1735 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); 1736 break; 1737 } 1738 case ir_binop_bit_xor: 1739 if (glsl_version >= 130) { 1740 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); 1741 break; 1742 } 1743 case ir_binop_bit_or: 1744 if (glsl_version >= 130) { 1745 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); 1746 break; 1747 } 1748 case ir_unop_round_even: 1749 assert(!"GLSL 1.30 features unsupported"); 1750 break; 1751 1752 case ir_quadop_vector: 1753 /* This operation should have already been handled. 1754 */ 1755 assert(!"Should not get here."); 1756 break; 1757 } 1758 1759 this->result = result_src; 1760} 1761 1762 1763void 1764glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1765{ 1766 st_src_reg src; 1767 int i; 1768 int swizzle[4]; 1769 1770 /* Note that this is only swizzles in expressions, not those on the left 1771 * hand side of an assignment, which do write masking. See ir_assignment 1772 * for that. 1773 */ 1774 1775 ir->val->accept(this); 1776 src = this->result; 1777 assert(src.file != PROGRAM_UNDEFINED); 1778 1779 for (i = 0; i < 4; i++) { 1780 if (i < ir->type->vector_elements) { 1781 switch (i) { 1782 case 0: 1783 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1784 break; 1785 case 1: 1786 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1787 break; 1788 case 2: 1789 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1790 break; 1791 case 3: 1792 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1793 break; 1794 } 1795 } else { 1796 /* If the type is smaller than a vec4, replicate the last 1797 * channel out. 1798 */ 1799 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1800 } 1801 } 1802 1803 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1804 1805 this->result = src; 1806} 1807 1808void 1809glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1810{ 1811 variable_storage *entry = find_variable_storage(ir->var); 1812 ir_variable *var = ir->var; 1813 1814 if (!entry) { 1815 switch (var->mode) { 1816 case ir_var_uniform: 1817 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1818 var->location); 1819 this->variables.push_tail(entry); 1820 break; 1821 case ir_var_in: 1822 case ir_var_inout: 1823 /* The linker assigns locations for varyings and attributes, 1824 * including deprecated builtins (like gl_Color), user-assign 1825 * generic attributes (glBindVertexLocation), and 1826 * user-defined varyings. 1827 * 1828 * FINISHME: We would hit this path for function arguments. Fix! 1829 */ 1830 assert(var->location != -1); 1831 entry = new(mem_ctx) variable_storage(var, 1832 PROGRAM_INPUT, 1833 var->location); 1834 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 1835 var->location >= VERT_ATTRIB_GENERIC0) { 1836 _mesa_add_attribute(this->prog->Attributes, 1837 var->name, 1838 _mesa_sizeof_glsl_type(var->type->gl_type), 1839 var->type->gl_type, 1840 var->location - VERT_ATTRIB_GENERIC0); 1841 } 1842 break; 1843 case ir_var_out: 1844 assert(var->location != -1); 1845 entry = new(mem_ctx) variable_storage(var, 1846 PROGRAM_OUTPUT, 1847 var->location); 1848 break; 1849 case ir_var_system_value: 1850 entry = new(mem_ctx) variable_storage(var, 1851 PROGRAM_SYSTEM_VALUE, 1852 var->location); 1853 break; 1854 case ir_var_auto: 1855 case ir_var_temporary: 1856 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1857 this->next_temp); 1858 this->variables.push_tail(entry); 1859 1860 next_temp += type_size(var->type); 1861 break; 1862 } 1863 1864 if (!entry) { 1865 printf("Failed to make storage for %s\n", var->name); 1866 exit(1); 1867 } 1868 } 1869 1870 this->result = st_src_reg(entry->file, entry->index, var->type); 1871 if (!native_integers) 1872 this->result.type = GLSL_TYPE_FLOAT; 1873} 1874 1875void 1876glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1877{ 1878 ir_constant *index; 1879 st_src_reg src; 1880 int element_size = type_size(ir->type); 1881 1882 index = ir->array_index->constant_expression_value(); 1883 1884 ir->array->accept(this); 1885 src = this->result; 1886 1887 if (index) { 1888 src.index += index->value.i[0] * element_size; 1889 } else { 1890 /* Variable index array dereference. It eats the "vec4" of the 1891 * base of the array and an index that offsets the TGSI register 1892 * index. 1893 */ 1894 ir->array_index->accept(this); 1895 1896 st_src_reg index_reg; 1897 1898 if (element_size == 1) { 1899 index_reg = this->result; 1900 } else { 1901 index_reg = get_temp(glsl_type::float_type); 1902 1903 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1904 this->result, st_src_reg_for_float(element_size)); 1905 } 1906 1907 /* If there was already a relative address register involved, add the 1908 * new and the old together to get the new offset. 1909 */ 1910 if (src.reladdr != NULL) { 1911 st_src_reg accum_reg = get_temp(glsl_type::float_type); 1912 1913 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 1914 index_reg, *src.reladdr); 1915 1916 index_reg = accum_reg; 1917 } 1918 1919 src.reladdr = ralloc(mem_ctx, st_src_reg); 1920 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1921 } 1922 1923 /* If the type is smaller than a vec4, replicate the last channel out. */ 1924 if (ir->type->is_scalar() || ir->type->is_vector()) 1925 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1926 else 1927 src.swizzle = SWIZZLE_NOOP; 1928 1929 this->result = src; 1930} 1931 1932void 1933glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 1934{ 1935 unsigned int i; 1936 const glsl_type *struct_type = ir->record->type; 1937 int offset = 0; 1938 1939 ir->record->accept(this); 1940 1941 for (i = 0; i < struct_type->length; i++) { 1942 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1943 break; 1944 offset += type_size(struct_type->fields.structure[i].type); 1945 } 1946 1947 /* If the type is smaller than a vec4, replicate the last channel out. */ 1948 if (ir->type->is_scalar() || ir->type->is_vector()) 1949 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1950 else 1951 this->result.swizzle = SWIZZLE_NOOP; 1952 1953 this->result.index += offset; 1954} 1955 1956/** 1957 * We want to be careful in assignment setup to hit the actual storage 1958 * instead of potentially using a temporary like we might with the 1959 * ir_dereference handler. 1960 */ 1961static st_dst_reg 1962get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 1963{ 1964 /* The LHS must be a dereference. If the LHS is a variable indexed array 1965 * access of a vector, it must be separated into a series conditional moves 1966 * before reaching this point (see ir_vec_index_to_cond_assign). 1967 */ 1968 assert(ir->as_dereference()); 1969 ir_dereference_array *deref_array = ir->as_dereference_array(); 1970 if (deref_array) { 1971 assert(!deref_array->array->type->is_vector()); 1972 } 1973 1974 /* Use the rvalue deref handler for the most part. We'll ignore 1975 * swizzles in it and write swizzles using writemask, though. 1976 */ 1977 ir->accept(v); 1978 return st_dst_reg(v->result); 1979} 1980 1981/** 1982 * Process the condition of a conditional assignment 1983 * 1984 * Examines the condition of a conditional assignment to generate the optimal 1985 * first operand of a \c CMP instruction. If the condition is a relational 1986 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1987 * used as the source for the \c CMP instruction. Otherwise the comparison 1988 * is processed to a boolean result, and the boolean result is used as the 1989 * operand to the CMP instruction. 1990 */ 1991bool 1992glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 1993{ 1994 ir_rvalue *src_ir = ir; 1995 bool negate = true; 1996 bool switch_order = false; 1997 1998 ir_expression *const expr = ir->as_expression(); 1999 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2000 bool zero_on_left = false; 2001 2002 if (expr->operands[0]->is_zero()) { 2003 src_ir = expr->operands[1]; 2004 zero_on_left = true; 2005 } else if (expr->operands[1]->is_zero()) { 2006 src_ir = expr->operands[0]; 2007 zero_on_left = false; 2008 } 2009 2010 /* a is - 0 + - 0 + 2011 * (a < 0) T F F ( a < 0) T F F 2012 * (0 < a) F F T (-a < 0) F F T 2013 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2014 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2015 * (a > 0) F F T (-a < 0) F F T 2016 * (0 > a) T F F ( a < 0) T F F 2017 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2018 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2019 * 2020 * Note that exchanging the order of 0 and 'a' in the comparison simply 2021 * means that the value of 'a' should be negated. 2022 */ 2023 if (src_ir != ir) { 2024 switch (expr->operation) { 2025 case ir_binop_less: 2026 switch_order = false; 2027 negate = zero_on_left; 2028 break; 2029 2030 case ir_binop_greater: 2031 switch_order = false; 2032 negate = !zero_on_left; 2033 break; 2034 2035 case ir_binop_lequal: 2036 switch_order = true; 2037 negate = !zero_on_left; 2038 break; 2039 2040 case ir_binop_gequal: 2041 switch_order = true; 2042 negate = zero_on_left; 2043 break; 2044 2045 default: 2046 /* This isn't the right kind of comparison afterall, so make sure 2047 * the whole condition is visited. 2048 */ 2049 src_ir = ir; 2050 break; 2051 } 2052 } 2053 } 2054 2055 src_ir->accept(this); 2056 2057 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2058 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2059 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2060 * computing the condition. 2061 */ 2062 if (negate) 2063 this->result.negate = ~this->result.negate; 2064 2065 return switch_order; 2066} 2067 2068void 2069glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2070{ 2071 st_dst_reg l; 2072 st_src_reg r; 2073 int i; 2074 2075 ir->rhs->accept(this); 2076 r = this->result; 2077 2078 l = get_assignment_lhs(ir->lhs, this); 2079 2080 /* FINISHME: This should really set to the correct maximal writemask for each 2081 * FINISHME: component written (in the loops below). This case can only 2082 * FINISHME: occur for matrices, arrays, and structures. 2083 */ 2084 if (ir->write_mask == 0) { 2085 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2086 l.writemask = WRITEMASK_XYZW; 2087 } else if (ir->lhs->type->is_scalar() && 2088 ir->lhs->variable_referenced()->mode == ir_var_out) { 2089 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2090 * FINISHME: W component of fragment shader output zero, work correctly. 2091 */ 2092 l.writemask = WRITEMASK_XYZW; 2093 } else { 2094 int swizzles[4]; 2095 int first_enabled_chan = 0; 2096 int rhs_chan = 0; 2097 2098 l.writemask = ir->write_mask; 2099 2100 for (int i = 0; i < 4; i++) { 2101 if (l.writemask & (1 << i)) { 2102 first_enabled_chan = GET_SWZ(r.swizzle, i); 2103 break; 2104 } 2105 } 2106 2107 /* Swizzle a small RHS vector into the channels being written. 2108 * 2109 * glsl ir treats write_mask as dictating how many channels are 2110 * present on the RHS while TGSI treats write_mask as just 2111 * showing which channels of the vec4 RHS get written. 2112 */ 2113 for (int i = 0; i < 4; i++) { 2114 if (l.writemask & (1 << i)) 2115 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2116 else 2117 swizzles[i] = first_enabled_chan; 2118 } 2119 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2120 swizzles[2], swizzles[3]); 2121 } 2122 2123 assert(l.file != PROGRAM_UNDEFINED); 2124 assert(r.file != PROGRAM_UNDEFINED); 2125 2126 if (ir->condition) { 2127 const bool switch_order = this->process_move_condition(ir->condition); 2128 st_src_reg condition = this->result; 2129 2130 for (i = 0; i < type_size(ir->lhs->type); i++) { 2131 st_src_reg l_src = st_src_reg(l); 2132 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2133 2134 if (switch_order) { 2135 emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); 2136 } else { 2137 emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); 2138 } 2139 2140 l.index++; 2141 r.index++; 2142 } 2143 } else if (ir->rhs->as_expression() && 2144 this->instructions.get_tail() && 2145 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2146 type_size(ir->lhs->type) == 1 && 2147 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2148 /* To avoid emitting an extra MOV when assigning an expression to a 2149 * variable, emit the last instruction of the expression again, but 2150 * replace the destination register with the target of the assignment. 2151 * Dead code elimination will remove the original instruction. 2152 */ 2153 glsl_to_tgsi_instruction *inst, *new_inst; 2154 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2155 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2156 new_inst->saturate = inst->saturate; 2157 } else { 2158 for (i = 0; i < type_size(ir->lhs->type); i++) { 2159 emit(ir, TGSI_OPCODE_MOV, l, r); 2160 l.index++; 2161 r.index++; 2162 } 2163 } 2164} 2165 2166 2167void 2168glsl_to_tgsi_visitor::visit(ir_constant *ir) 2169{ 2170 st_src_reg src; 2171 GLfloat stack_vals[4] = { 0 }; 2172 gl_constant_value *values = (gl_constant_value *) stack_vals; 2173 GLenum gl_type = GL_NONE; 2174 unsigned int i; 2175 static int in_array = 0; 2176 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2177 2178 /* Unfortunately, 4 floats is all we can get into 2179 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2180 * aggregate constant and move each constant value into it. If we 2181 * get lucky, copy propagation will eliminate the extra moves. 2182 */ 2183 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2184 st_src_reg temp_base = get_temp(ir->type); 2185 st_dst_reg temp = st_dst_reg(temp_base); 2186 2187 foreach_iter(exec_list_iterator, iter, ir->components) { 2188 ir_constant *field_value = (ir_constant *)iter.get(); 2189 int size = type_size(field_value->type); 2190 2191 assert(size > 0); 2192 2193 field_value->accept(this); 2194 src = this->result; 2195 2196 for (i = 0; i < (unsigned int)size; i++) { 2197 emit(ir, TGSI_OPCODE_MOV, temp, src); 2198 2199 src.index++; 2200 temp.index++; 2201 } 2202 } 2203 this->result = temp_base; 2204 return; 2205 } 2206 2207 if (ir->type->is_array()) { 2208 st_src_reg temp_base = get_temp(ir->type); 2209 st_dst_reg temp = st_dst_reg(temp_base); 2210 int size = type_size(ir->type->fields.array); 2211 2212 assert(size > 0); 2213 in_array++; 2214 2215 for (i = 0; i < ir->type->length; i++) { 2216 ir->array_elements[i]->accept(this); 2217 src = this->result; 2218 for (int j = 0; j < size; j++) { 2219 emit(ir, TGSI_OPCODE_MOV, temp, src); 2220 2221 src.index++; 2222 temp.index++; 2223 } 2224 } 2225 this->result = temp_base; 2226 in_array--; 2227 return; 2228 } 2229 2230 if (ir->type->is_matrix()) { 2231 st_src_reg mat = get_temp(ir->type); 2232 st_dst_reg mat_column = st_dst_reg(mat); 2233 2234 for (i = 0; i < ir->type->matrix_columns; i++) { 2235 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2236 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2237 2238 src = st_src_reg(file, -1, ir->type->base_type); 2239 src.index = add_constant(file, 2240 values, 2241 ir->type->vector_elements, 2242 GL_FLOAT, 2243 &src.swizzle); 2244 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2245 2246 mat_column.index++; 2247 } 2248 2249 this->result = mat; 2250 return; 2251 } 2252 2253 switch (ir->type->base_type) { 2254 case GLSL_TYPE_FLOAT: 2255 gl_type = GL_FLOAT; 2256 for (i = 0; i < ir->type->vector_elements; i++) { 2257 values[i].f = ir->value.f[i]; 2258 } 2259 break; 2260 case GLSL_TYPE_UINT: 2261 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2262 for (i = 0; i < ir->type->vector_elements; i++) { 2263 if (native_integers) 2264 values[i].u = ir->value.u[i]; 2265 else 2266 values[i].f = ir->value.u[i]; 2267 } 2268 break; 2269 case GLSL_TYPE_INT: 2270 gl_type = native_integers ? GL_INT : GL_FLOAT; 2271 for (i = 0; i < ir->type->vector_elements; i++) { 2272 if (native_integers) 2273 values[i].i = ir->value.i[i]; 2274 else 2275 values[i].f = ir->value.i[i]; 2276 } 2277 break; 2278 case GLSL_TYPE_BOOL: 2279 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2280 for (i = 0; i < ir->type->vector_elements; i++) { 2281 if (native_integers) 2282 values[i].b = ir->value.b[i]; 2283 else 2284 values[i].f = ir->value.b[i]; 2285 } 2286 break; 2287 default: 2288 assert(!"Non-float/uint/int/bool constant"); 2289 } 2290 2291 this->result = st_src_reg(file, -1, ir->type); 2292 this->result.index = add_constant(file, 2293 values, 2294 ir->type->vector_elements, 2295 gl_type, 2296 &this->result.swizzle); 2297} 2298 2299function_entry * 2300glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2301{ 2302 function_entry *entry; 2303 2304 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2305 entry = (function_entry *)iter.get(); 2306 2307 if (entry->sig == sig) 2308 return entry; 2309 } 2310 2311 entry = ralloc(mem_ctx, function_entry); 2312 entry->sig = sig; 2313 entry->sig_id = this->next_signature_id++; 2314 entry->bgn_inst = NULL; 2315 2316 /* Allocate storage for all the parameters. */ 2317 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2318 ir_variable *param = (ir_variable *)iter.get(); 2319 variable_storage *storage; 2320 2321 storage = find_variable_storage(param); 2322 assert(!storage); 2323 2324 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2325 this->next_temp); 2326 this->variables.push_tail(storage); 2327 2328 this->next_temp += type_size(param->type); 2329 } 2330 2331 if (!sig->return_type->is_void()) { 2332 entry->return_reg = get_temp(sig->return_type); 2333 } else { 2334 entry->return_reg = undef_src; 2335 } 2336 2337 this->function_signatures.push_tail(entry); 2338 return entry; 2339} 2340 2341void 2342glsl_to_tgsi_visitor::visit(ir_call *ir) 2343{ 2344 glsl_to_tgsi_instruction *call_inst; 2345 ir_function_signature *sig = ir->get_callee(); 2346 function_entry *entry = get_function_signature(sig); 2347 int i; 2348 2349 /* Process in parameters. */ 2350 exec_list_iterator sig_iter = sig->parameters.iterator(); 2351 foreach_iter(exec_list_iterator, iter, *ir) { 2352 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2353 ir_variable *param = (ir_variable *)sig_iter.get(); 2354 2355 if (param->mode == ir_var_in || 2356 param->mode == ir_var_inout) { 2357 variable_storage *storage = find_variable_storage(param); 2358 assert(storage); 2359 2360 param_rval->accept(this); 2361 st_src_reg r = this->result; 2362 2363 st_dst_reg l; 2364 l.file = storage->file; 2365 l.index = storage->index; 2366 l.reladdr = NULL; 2367 l.writemask = WRITEMASK_XYZW; 2368 l.cond_mask = COND_TR; 2369 2370 for (i = 0; i < type_size(param->type); i++) { 2371 emit(ir, TGSI_OPCODE_MOV, l, r); 2372 l.index++; 2373 r.index++; 2374 } 2375 } 2376 2377 sig_iter.next(); 2378 } 2379 assert(!sig_iter.has_next()); 2380 2381 /* Emit call instruction */ 2382 call_inst = emit(ir, TGSI_OPCODE_CAL); 2383 call_inst->function = entry; 2384 2385 /* Process out parameters. */ 2386 sig_iter = sig->parameters.iterator(); 2387 foreach_iter(exec_list_iterator, iter, *ir) { 2388 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2389 ir_variable *param = (ir_variable *)sig_iter.get(); 2390 2391 if (param->mode == ir_var_out || 2392 param->mode == ir_var_inout) { 2393 variable_storage *storage = find_variable_storage(param); 2394 assert(storage); 2395 2396 st_src_reg r; 2397 r.file = storage->file; 2398 r.index = storage->index; 2399 r.reladdr = NULL; 2400 r.swizzle = SWIZZLE_NOOP; 2401 r.negate = 0; 2402 2403 param_rval->accept(this); 2404 st_dst_reg l = st_dst_reg(this->result); 2405 2406 for (i = 0; i < type_size(param->type); i++) { 2407 emit(ir, TGSI_OPCODE_MOV, l, r); 2408 l.index++; 2409 r.index++; 2410 } 2411 } 2412 2413 sig_iter.next(); 2414 } 2415 assert(!sig_iter.has_next()); 2416 2417 /* Process return value. */ 2418 this->result = entry->return_reg; 2419} 2420 2421void 2422glsl_to_tgsi_visitor::visit(ir_texture *ir) 2423{ 2424 st_src_reg result_src, coord, lod_info, projector, dx, dy; 2425 st_dst_reg result_dst, coord_dst; 2426 glsl_to_tgsi_instruction *inst = NULL; 2427 unsigned opcode = TGSI_OPCODE_NOP; 2428 2429 if (ir->coordinate) { 2430 ir->coordinate->accept(this); 2431 2432 /* Put our coords in a temp. We'll need to modify them for shadow, 2433 * projection, or LOD, so the only case we'd use it as is is if 2434 * we're doing plain old texturing. The optimization passes on 2435 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2436 */ 2437 coord = get_temp(glsl_type::vec4_type); 2438 coord_dst = st_dst_reg(coord); 2439 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2440 } 2441 2442 if (ir->projector) { 2443 ir->projector->accept(this); 2444 projector = this->result; 2445 } 2446 2447 /* Storage for our result. Ideally for an assignment we'd be using 2448 * the actual storage for the result here, instead. 2449 */ 2450 result_src = get_temp(glsl_type::vec4_type); 2451 result_dst = st_dst_reg(result_src); 2452 2453 switch (ir->op) { 2454 case ir_tex: 2455 opcode = TGSI_OPCODE_TEX; 2456 break; 2457 case ir_txb: 2458 opcode = TGSI_OPCODE_TXB; 2459 ir->lod_info.bias->accept(this); 2460 lod_info = this->result; 2461 break; 2462 case ir_txl: 2463 opcode = TGSI_OPCODE_TXL; 2464 ir->lod_info.lod->accept(this); 2465 lod_info = this->result; 2466 break; 2467 case ir_txd: 2468 opcode = TGSI_OPCODE_TXD; 2469 ir->lod_info.grad.dPdx->accept(this); 2470 dx = this->result; 2471 ir->lod_info.grad.dPdy->accept(this); 2472 dy = this->result; 2473 break; 2474 case ir_txs: 2475 opcode = TGSI_OPCODE_TXQ; 2476 ir->lod_info.lod->accept(this); 2477 lod_info = this->result; 2478 break; 2479 case ir_txf: 2480 opcode = TGSI_OPCODE_TXF; 2481 ir->lod_info.lod->accept(this); 2482 lod_info = this->result; 2483 break; 2484 } 2485 2486 if (ir->projector) { 2487 if (opcode == TGSI_OPCODE_TEX) { 2488 /* Slot the projector in as the last component of the coord. */ 2489 coord_dst.writemask = WRITEMASK_W; 2490 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2491 coord_dst.writemask = WRITEMASK_XYZW; 2492 opcode = TGSI_OPCODE_TXP; 2493 } else { 2494 st_src_reg coord_w = coord; 2495 coord_w.swizzle = SWIZZLE_WWWW; 2496 2497 /* For the other TEX opcodes there's no projective version 2498 * since the last slot is taken up by LOD info. Do the 2499 * projective divide now. 2500 */ 2501 coord_dst.writemask = WRITEMASK_W; 2502 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2503 2504 /* In the case where we have to project the coordinates "by hand," 2505 * the shadow comparator value must also be projected. 2506 */ 2507 st_src_reg tmp_src = coord; 2508 if (ir->shadow_comparitor) { 2509 /* Slot the shadow value in as the second to last component of the 2510 * coord. 2511 */ 2512 ir->shadow_comparitor->accept(this); 2513 2514 tmp_src = get_temp(glsl_type::vec4_type); 2515 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2516 2517 tmp_dst.writemask = WRITEMASK_Z; 2518 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2519 2520 tmp_dst.writemask = WRITEMASK_XY; 2521 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2522 } 2523 2524 coord_dst.writemask = WRITEMASK_XYZ; 2525 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2526 2527 coord_dst.writemask = WRITEMASK_XYZW; 2528 coord.swizzle = SWIZZLE_XYZW; 2529 } 2530 } 2531 2532 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2533 * comparator was put in the correct place (and projected) by the code, 2534 * above, that handles by-hand projection. 2535 */ 2536 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2537 /* Slot the shadow value in as the second to last component of the 2538 * coord. 2539 */ 2540 ir->shadow_comparitor->accept(this); 2541 coord_dst.writemask = WRITEMASK_Z; 2542 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2543 coord_dst.writemask = WRITEMASK_XYZW; 2544 } 2545 2546 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2547 opcode == TGSI_OPCODE_TXF) { 2548 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2549 coord_dst.writemask = WRITEMASK_W; 2550 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2551 coord_dst.writemask = WRITEMASK_XYZW; 2552 } 2553 2554 if (opcode == TGSI_OPCODE_TXD) 2555 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2556 else if (opcode == TGSI_OPCODE_TXQ) 2557 inst = emit(ir, opcode, result_dst, lod_info); 2558 else 2559 inst = emit(ir, opcode, result_dst, coord); 2560 2561 if (ir->shadow_comparitor) 2562 inst->tex_shadow = GL_TRUE; 2563 2564 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2565 this->shader_program, 2566 this->prog); 2567 2568 const glsl_type *sampler_type = ir->sampler->type; 2569 2570 switch (sampler_type->sampler_dimensionality) { 2571 case GLSL_SAMPLER_DIM_1D: 2572 inst->tex_target = (sampler_type->sampler_array) 2573 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2574 break; 2575 case GLSL_SAMPLER_DIM_2D: 2576 inst->tex_target = (sampler_type->sampler_array) 2577 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2578 break; 2579 case GLSL_SAMPLER_DIM_3D: 2580 inst->tex_target = TEXTURE_3D_INDEX; 2581 break; 2582 case GLSL_SAMPLER_DIM_CUBE: 2583 inst->tex_target = TEXTURE_CUBE_INDEX; 2584 break; 2585 case GLSL_SAMPLER_DIM_RECT: 2586 inst->tex_target = TEXTURE_RECT_INDEX; 2587 break; 2588 case GLSL_SAMPLER_DIM_BUF: 2589 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2590 break; 2591 default: 2592 assert(!"Should not get here."); 2593 } 2594 2595 this->result = result_src; 2596} 2597 2598void 2599glsl_to_tgsi_visitor::visit(ir_return *ir) 2600{ 2601 if (ir->get_value()) { 2602 st_dst_reg l; 2603 int i; 2604 2605 assert(current_function); 2606 2607 ir->get_value()->accept(this); 2608 st_src_reg r = this->result; 2609 2610 l = st_dst_reg(current_function->return_reg); 2611 2612 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2613 emit(ir, TGSI_OPCODE_MOV, l, r); 2614 l.index++; 2615 r.index++; 2616 } 2617 } 2618 2619 emit(ir, TGSI_OPCODE_RET); 2620} 2621 2622void 2623glsl_to_tgsi_visitor::visit(ir_discard *ir) 2624{ 2625 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2626 2627 if (ir->condition) { 2628 ir->condition->accept(this); 2629 this->result.negate = ~this->result.negate; 2630 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2631 } else { 2632 emit(ir, TGSI_OPCODE_KILP); 2633 } 2634 2635 fp->UsesKill = GL_TRUE; 2636} 2637 2638void 2639glsl_to_tgsi_visitor::visit(ir_if *ir) 2640{ 2641 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2642 glsl_to_tgsi_instruction *prev_inst; 2643 2644 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2645 2646 ir->condition->accept(this); 2647 assert(this->result.file != PROGRAM_UNDEFINED); 2648 2649 if (this->options->EmitCondCodes) { 2650 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2651 2652 /* See if we actually generated any instruction for generating 2653 * the condition. If not, then cook up a move to a temp so we 2654 * have something to set cond_update on. 2655 */ 2656 if (cond_inst == prev_inst) { 2657 st_src_reg temp = get_temp(glsl_type::bool_type); 2658 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2659 } 2660 cond_inst->cond_update = GL_TRUE; 2661 2662 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2663 if_inst->dst.cond_mask = COND_NE; 2664 } else { 2665 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2666 } 2667 2668 this->instructions.push_tail(if_inst); 2669 2670 visit_exec_list(&ir->then_instructions, this); 2671 2672 if (!ir->else_instructions.is_empty()) { 2673 emit(ir->condition, TGSI_OPCODE_ELSE); 2674 visit_exec_list(&ir->else_instructions, this); 2675 } 2676 2677 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2678} 2679 2680glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2681{ 2682 result.file = PROGRAM_UNDEFINED; 2683 next_temp = 1; 2684 next_signature_id = 1; 2685 num_immediates = 0; 2686 current_function = NULL; 2687 num_address_regs = 0; 2688 indirect_addr_temps = false; 2689 indirect_addr_consts = false; 2690 mem_ctx = ralloc_context(NULL); 2691} 2692 2693glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2694{ 2695 ralloc_free(mem_ctx); 2696} 2697 2698extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2699{ 2700 delete v; 2701} 2702 2703 2704/** 2705 * Count resources used by the given gpu program (number of texture 2706 * samplers, etc). 2707 */ 2708static void 2709count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2710{ 2711 v->samplers_used = 0; 2712 2713 foreach_iter(exec_list_iterator, iter, v->instructions) { 2714 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2715 2716 if (is_tex_instruction(inst->op)) { 2717 v->samplers_used |= 1 << inst->sampler; 2718 2719 prog->SamplerTargets[inst->sampler] = 2720 (gl_texture_index)inst->tex_target; 2721 if (inst->tex_shadow) { 2722 prog->ShadowSamplers |= 1 << inst->sampler; 2723 } 2724 } 2725 } 2726 2727 prog->SamplersUsed = v->samplers_used; 2728 _mesa_update_shader_textures_used(prog); 2729} 2730 2731 2732/** 2733 * Check if the given vertex/fragment/shader program is within the 2734 * resource limits of the context (number of texture units, etc). 2735 * If any of those checks fail, record a linker error. 2736 * 2737 * XXX more checks are needed... 2738 */ 2739static void 2740check_resources(const struct gl_context *ctx, 2741 struct gl_shader_program *shader_program, 2742 glsl_to_tgsi_visitor *prog, 2743 struct gl_program *proginfo) 2744{ 2745 switch (proginfo->Target) { 2746 case GL_VERTEX_PROGRAM_ARB: 2747 if (_mesa_bitcount(prog->samplers_used) > 2748 ctx->Const.MaxVertexTextureImageUnits) { 2749 fail_link(shader_program, "Too many vertex shader texture samplers"); 2750 } 2751 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2752 fail_link(shader_program, "Too many vertex shader constants"); 2753 } 2754 break; 2755 case MESA_GEOMETRY_PROGRAM: 2756 if (_mesa_bitcount(prog->samplers_used) > 2757 ctx->Const.MaxGeometryTextureImageUnits) { 2758 fail_link(shader_program, "Too many geometry shader texture samplers"); 2759 } 2760 if (proginfo->Parameters->NumParameters > 2761 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { 2762 fail_link(shader_program, "Too many geometry shader constants"); 2763 } 2764 break; 2765 case GL_FRAGMENT_PROGRAM_ARB: 2766 if (_mesa_bitcount(prog->samplers_used) > 2767 ctx->Const.MaxTextureImageUnits) { 2768 fail_link(shader_program, "Too many fragment shader texture samplers"); 2769 } 2770 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2771 fail_link(shader_program, "Too many fragment shader constants"); 2772 } 2773 break; 2774 default: 2775 _mesa_problem(ctx, "unexpected program type in check_resources()"); 2776 } 2777} 2778 2779 2780 2781struct uniform_sort { 2782 struct gl_uniform *u; 2783 int pos; 2784}; 2785 2786/* The shader_program->Uniforms list is almost sorted in increasing 2787 * uniform->{Frag,Vert}Pos locations, but not quite when there are 2788 * uniforms shared between targets. We need to add parameters in 2789 * increasing order for the targets. 2790 */ 2791static int 2792sort_uniforms(const void *a, const void *b) 2793{ 2794 struct uniform_sort *u1 = (struct uniform_sort *)a; 2795 struct uniform_sort *u2 = (struct uniform_sort *)b; 2796 2797 return u1->pos - u2->pos; 2798} 2799 2800/* Add the uniforms to the parameters. The linker chose locations 2801 * in our parameters lists (which weren't created yet), which the 2802 * uniforms code will use to poke values into our parameters list 2803 * when uniforms are updated. 2804 */ 2805static void 2806add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, 2807 struct gl_shader *shader, 2808 struct gl_program *prog) 2809{ 2810 unsigned int i; 2811 unsigned int next_sampler = 0, num_uniforms = 0; 2812 struct uniform_sort *sorted_uniforms; 2813 2814 sorted_uniforms = ralloc_array(NULL, struct uniform_sort, 2815 shader_program->Uniforms->NumUniforms); 2816 2817 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { 2818 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; 2819 int parameter_index = -1; 2820 2821 switch (shader->Type) { 2822 case GL_VERTEX_SHADER: 2823 parameter_index = uniform->VertPos; 2824 break; 2825 case GL_FRAGMENT_SHADER: 2826 parameter_index = uniform->FragPos; 2827 break; 2828 case GL_GEOMETRY_SHADER: 2829 parameter_index = uniform->GeomPos; 2830 break; 2831 } 2832 2833 /* Only add uniforms used in our target. */ 2834 if (parameter_index != -1) { 2835 sorted_uniforms[num_uniforms].pos = parameter_index; 2836 sorted_uniforms[num_uniforms].u = uniform; 2837 num_uniforms++; 2838 } 2839 } 2840 2841 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), 2842 sort_uniforms); 2843 2844 for (i = 0; i < num_uniforms; i++) { 2845 struct gl_uniform *uniform = sorted_uniforms[i].u; 2846 int parameter_index = sorted_uniforms[i].pos; 2847 const glsl_type *type = uniform->Type; 2848 unsigned int size; 2849 2850 if (type->is_vector() || 2851 type->is_scalar()) { 2852 size = type->vector_elements; 2853 } else { 2854 size = type_size(type) * 4; 2855 } 2856 2857 gl_register_file file; 2858 if (type->is_sampler() || 2859 (type->is_array() && type->fields.array->is_sampler())) { 2860 file = PROGRAM_SAMPLER; 2861 } else { 2862 file = PROGRAM_UNIFORM; 2863 } 2864 2865 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, 2866 uniform->Name); 2867 2868 if (index < 0) { 2869 index = _mesa_add_parameter(prog->Parameters, file, 2870 uniform->Name, size, type->gl_type, 2871 NULL, NULL, 0x0); 2872 2873 /* Sampler uniform values are stored in prog->SamplerUnits, 2874 * and the entry in that array is selected by this index we 2875 * store in ParameterValues[]. 2876 */ 2877 if (file == PROGRAM_SAMPLER) { 2878 for (unsigned int j = 0; j < size / 4; j++) 2879 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; 2880 } 2881 2882 /* The location chosen in the Parameters list here (returned 2883 * from _mesa_add_uniform) has to match what the linker chose. 2884 */ 2885 if (index != parameter_index) { 2886 fail_link(shader_program, "Allocation of uniform `%s' to target " 2887 "failed (%d vs %d)\n", 2888 uniform->Name, index, parameter_index); 2889 } 2890 } 2891 } 2892 2893 ralloc_free(sorted_uniforms); 2894} 2895 2896static void 2897set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2898 struct gl_shader_program *shader_program, 2899 const char *name, const glsl_type *type, 2900 ir_constant *val) 2901{ 2902 if (type->is_record()) { 2903 ir_constant *field_constant; 2904 2905 field_constant = (ir_constant *)val->components.get_head(); 2906 2907 for (unsigned int i = 0; i < type->length; i++) { 2908 const glsl_type *field_type = type->fields.structure[i].type; 2909 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2910 type->fields.structure[i].name); 2911 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2912 field_type, field_constant); 2913 field_constant = (ir_constant *)field_constant->next; 2914 } 2915 return; 2916 } 2917 2918 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2919 2920 if (loc == -1) { 2921 fail_link(shader_program, 2922 "Couldn't find uniform for initializer %s\n", name); 2923 return; 2924 } 2925 2926 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2927 ir_constant *element; 2928 const glsl_type *element_type; 2929 if (type->is_array()) { 2930 element = val->array_elements[i]; 2931 element_type = type->fields.array; 2932 } else { 2933 element = val; 2934 element_type = type; 2935 } 2936 2937 void *values; 2938 2939 if (element_type->base_type == GLSL_TYPE_BOOL) { 2940 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2941 for (unsigned int j = 0; j < element_type->components(); j++) { 2942 conv[j] = element->value.b[j]; 2943 } 2944 values = (void *)conv; 2945 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2946 element_type->vector_elements, 2947 1); 2948 } else { 2949 values = &element->value; 2950 } 2951 2952 if (element_type->is_matrix()) { 2953 _mesa_uniform_matrix(ctx, shader_program, 2954 element_type->matrix_columns, 2955 element_type->vector_elements, 2956 loc, 1, GL_FALSE, (GLfloat *)values); 2957 loc += element_type->matrix_columns; 2958 } else { 2959 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2960 values, element_type->gl_type); 2961 loc += type_size(element_type); 2962 } 2963 } 2964} 2965 2966/* 2967 * Scan/rewrite program to remove reads of custom (output) registers. 2968 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2969 * (for vertex shaders). 2970 * In GLSL shaders, varying vars can be read and written. 2971 * On some hardware, trying to read an output register causes trouble. 2972 * So, rewrite the program to use a temporary register in this case. 2973 * 2974 * Based on _mesa_remove_output_reads from programopt.c. 2975 */ 2976void 2977glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2978{ 2979 GLuint i; 2980 GLint outputMap[VERT_RESULT_MAX]; 2981 GLint outputTypes[VERT_RESULT_MAX]; 2982 GLuint numVaryingReads = 0; 2983 GLboolean usedTemps[MAX_TEMPS]; 2984 GLuint firstTemp = 0; 2985 2986 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2987 usedTemps, MAX_TEMPS); 2988 2989 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2990 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2991 2992 for (i = 0; i < VERT_RESULT_MAX; i++) 2993 outputMap[i] = -1; 2994 2995 /* look for instructions which read from varying vars */ 2996 foreach_iter(exec_list_iterator, iter, this->instructions) { 2997 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2998 const GLuint numSrc = num_inst_src_regs(inst->op); 2999 GLuint j; 3000 for (j = 0; j < numSrc; j++) { 3001 if (inst->src[j].file == type) { 3002 /* replace the read with a temp reg */ 3003 const GLuint var = inst->src[j].index; 3004 if (outputMap[var] == -1) { 3005 numVaryingReads++; 3006 outputMap[var] = _mesa_find_free_register(usedTemps, 3007 MAX_TEMPS, 3008 firstTemp); 3009 outputTypes[var] = inst->src[j].type; 3010 firstTemp = outputMap[var] + 1; 3011 } 3012 inst->src[j].file = PROGRAM_TEMPORARY; 3013 inst->src[j].index = outputMap[var]; 3014 } 3015 } 3016 } 3017 3018 if (numVaryingReads == 0) 3019 return; /* nothing to be done */ 3020 3021 /* look for instructions which write to the varying vars identified above */ 3022 foreach_iter(exec_list_iterator, iter, this->instructions) { 3023 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3024 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 3025 /* change inst to write to the temp reg, instead of the varying */ 3026 inst->dst.file = PROGRAM_TEMPORARY; 3027 inst->dst.index = outputMap[inst->dst.index]; 3028 } 3029 } 3030 3031 /* insert new MOV instructions at the end */ 3032 for (i = 0; i < VERT_RESULT_MAX; i++) { 3033 if (outputMap[i] >= 0) { 3034 /* MOV VAR[i], TEMP[tmp]; */ 3035 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 3036 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 3037 dst.index = i; 3038 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 3039 } 3040 } 3041} 3042 3043/** 3044 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 3045 * are read from the given src in this instruction 3046 */ 3047static int 3048get_src_arg_mask(st_dst_reg dst, st_src_reg src) 3049{ 3050 int read_mask = 0, comp; 3051 3052 /* Now, given the src swizzle and the written channels, find which 3053 * components are actually read 3054 */ 3055 for (comp = 0; comp < 4; ++comp) { 3056 const unsigned coord = GET_SWZ(src.swizzle, comp); 3057 ASSERT(coord < 4); 3058 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 3059 read_mask |= 1 << coord; 3060 } 3061 3062 return read_mask; 3063} 3064 3065/** 3066 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 3067 * instruction is the first instruction to write to register T0. There are 3068 * several lowering passes done in GLSL IR (e.g. branches and 3069 * relative addressing) that create a large number of conditional assignments 3070 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 3071 * 3072 * Here is why this conversion is safe: 3073 * CMP T0, T1 T2 T0 can be expanded to: 3074 * if (T1 < 0.0) 3075 * MOV T0, T2; 3076 * else 3077 * MOV T0, T0; 3078 * 3079 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3080 * as the original program. If (T1 < 0.0) evaluates to false, executing 3081 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3082 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3083 * because any instruction that was going to read from T0 after this was going 3084 * to read a garbage value anyway. 3085 */ 3086void 3087glsl_to_tgsi_visitor::simplify_cmp(void) 3088{ 3089 unsigned tempWrites[MAX_TEMPS]; 3090 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3091 3092 memset(tempWrites, 0, sizeof(tempWrites)); 3093 memset(outputWrites, 0, sizeof(outputWrites)); 3094 3095 foreach_iter(exec_list_iterator, iter, this->instructions) { 3096 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3097 unsigned prevWriteMask = 0; 3098 3099 /* Give up if we encounter relative addressing or flow control. */ 3100 if (inst->dst.reladdr || 3101 tgsi_get_opcode_info(inst->op)->is_branch || 3102 inst->op == TGSI_OPCODE_BGNSUB || 3103 inst->op == TGSI_OPCODE_CONT || 3104 inst->op == TGSI_OPCODE_END || 3105 inst->op == TGSI_OPCODE_ENDSUB || 3106 inst->op == TGSI_OPCODE_RET) { 3107 return; 3108 } 3109 3110 if (inst->dst.file == PROGRAM_OUTPUT) { 3111 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3112 prevWriteMask = outputWrites[inst->dst.index]; 3113 outputWrites[inst->dst.index] |= inst->dst.writemask; 3114 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3115 assert(inst->dst.index < MAX_TEMPS); 3116 prevWriteMask = tempWrites[inst->dst.index]; 3117 tempWrites[inst->dst.index] |= inst->dst.writemask; 3118 } 3119 3120 /* For a CMP to be considered a conditional write, the destination 3121 * register and source register two must be the same. */ 3122 if (inst->op == TGSI_OPCODE_CMP 3123 && !(inst->dst.writemask & prevWriteMask) 3124 && inst->src[2].file == inst->dst.file 3125 && inst->src[2].index == inst->dst.index 3126 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3127 3128 inst->op = TGSI_OPCODE_MOV; 3129 inst->src[0] = inst->src[1]; 3130 } 3131 } 3132} 3133 3134/* Replaces all references to a temporary register index with another index. */ 3135void 3136glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3137{ 3138 foreach_iter(exec_list_iterator, iter, this->instructions) { 3139 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3140 unsigned j; 3141 3142 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3143 if (inst->src[j].file == PROGRAM_TEMPORARY && 3144 inst->src[j].index == index) { 3145 inst->src[j].index = new_index; 3146 } 3147 } 3148 3149 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3150 inst->dst.index = new_index; 3151 } 3152 } 3153} 3154 3155int 3156glsl_to_tgsi_visitor::get_first_temp_read(int index) 3157{ 3158 int depth = 0; /* loop depth */ 3159 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3160 unsigned i = 0, j; 3161 3162 foreach_iter(exec_list_iterator, iter, this->instructions) { 3163 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3164 3165 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3166 if (inst->src[j].file == PROGRAM_TEMPORARY && 3167 inst->src[j].index == index) { 3168 return (depth == 0) ? i : loop_start; 3169 } 3170 } 3171 3172 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3173 if(depth++ == 0) 3174 loop_start = i; 3175 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3176 if (--depth == 0) 3177 loop_start = -1; 3178 } 3179 assert(depth >= 0); 3180 3181 i++; 3182 } 3183 3184 return -1; 3185} 3186 3187int 3188glsl_to_tgsi_visitor::get_first_temp_write(int index) 3189{ 3190 int depth = 0; /* loop depth */ 3191 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3192 int i = 0; 3193 3194 foreach_iter(exec_list_iterator, iter, this->instructions) { 3195 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3196 3197 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3198 return (depth == 0) ? i : loop_start; 3199 } 3200 3201 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3202 if(depth++ == 0) 3203 loop_start = i; 3204 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3205 if (--depth == 0) 3206 loop_start = -1; 3207 } 3208 assert(depth >= 0); 3209 3210 i++; 3211 } 3212 3213 return -1; 3214} 3215 3216int 3217glsl_to_tgsi_visitor::get_last_temp_read(int index) 3218{ 3219 int depth = 0; /* loop depth */ 3220 int last = -1; /* index of last instruction that reads the temporary */ 3221 unsigned i = 0, j; 3222 3223 foreach_iter(exec_list_iterator, iter, this->instructions) { 3224 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3225 3226 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3227 if (inst->src[j].file == PROGRAM_TEMPORARY && 3228 inst->src[j].index == index) { 3229 last = (depth == 0) ? i : -2; 3230 } 3231 } 3232 3233 if (inst->op == TGSI_OPCODE_BGNLOOP) 3234 depth++; 3235 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3236 if (--depth == 0 && last == -2) 3237 last = i; 3238 assert(depth >= 0); 3239 3240 i++; 3241 } 3242 3243 assert(last >= -1); 3244 return last; 3245} 3246 3247int 3248glsl_to_tgsi_visitor::get_last_temp_write(int index) 3249{ 3250 int depth = 0; /* loop depth */ 3251 int last = -1; /* index of last instruction that writes to the temporary */ 3252 int i = 0; 3253 3254 foreach_iter(exec_list_iterator, iter, this->instructions) { 3255 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3256 3257 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3258 last = (depth == 0) ? i : -2; 3259 3260 if (inst->op == TGSI_OPCODE_BGNLOOP) 3261 depth++; 3262 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3263 if (--depth == 0 && last == -2) 3264 last = i; 3265 assert(depth >= 0); 3266 3267 i++; 3268 } 3269 3270 assert(last >= -1); 3271 return last; 3272} 3273 3274/* 3275 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3276 * channels for copy propagation and updates following instructions to 3277 * use the original versions. 3278 * 3279 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3280 * will occur. As an example, a TXP production before this pass: 3281 * 3282 * 0: MOV TEMP[1], INPUT[4].xyyy; 3283 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3284 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3285 * 3286 * and after: 3287 * 3288 * 0: MOV TEMP[1], INPUT[4].xyyy; 3289 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3290 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3291 * 3292 * which allows for dead code elimination on TEMP[1]'s writes. 3293 */ 3294void 3295glsl_to_tgsi_visitor::copy_propagate(void) 3296{ 3297 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3298 glsl_to_tgsi_instruction *, 3299 this->next_temp * 4); 3300 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3301 int level = 0; 3302 3303 foreach_iter(exec_list_iterator, iter, this->instructions) { 3304 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3305 3306 assert(inst->dst.file != PROGRAM_TEMPORARY 3307 || inst->dst.index < this->next_temp); 3308 3309 /* First, do any copy propagation possible into the src regs. */ 3310 for (int r = 0; r < 3; r++) { 3311 glsl_to_tgsi_instruction *first = NULL; 3312 bool good = true; 3313 int acp_base = inst->src[r].index * 4; 3314 3315 if (inst->src[r].file != PROGRAM_TEMPORARY || 3316 inst->src[r].reladdr) 3317 continue; 3318 3319 /* See if we can find entries in the ACP consisting of MOVs 3320 * from the same src register for all the swizzled channels 3321 * of this src register reference. 3322 */ 3323 for (int i = 0; i < 4; i++) { 3324 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3325 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3326 3327 if (!copy_chan) { 3328 good = false; 3329 break; 3330 } 3331 3332 assert(acp_level[acp_base + src_chan] <= level); 3333 3334 if (!first) { 3335 first = copy_chan; 3336 } else { 3337 if (first->src[0].file != copy_chan->src[0].file || 3338 first->src[0].index != copy_chan->src[0].index) { 3339 good = false; 3340 break; 3341 } 3342 } 3343 } 3344 3345 if (good) { 3346 /* We've now validated that we can copy-propagate to 3347 * replace this src register reference. Do it. 3348 */ 3349 inst->src[r].file = first->src[0].file; 3350 inst->src[r].index = first->src[0].index; 3351 3352 int swizzle = 0; 3353 for (int i = 0; i < 4; i++) { 3354 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3355 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3356 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3357 (3 * i)); 3358 } 3359 inst->src[r].swizzle = swizzle; 3360 } 3361 } 3362 3363 switch (inst->op) { 3364 case TGSI_OPCODE_BGNLOOP: 3365 case TGSI_OPCODE_ENDLOOP: 3366 /* End of a basic block, clear the ACP entirely. */ 3367 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3368 break; 3369 3370 case TGSI_OPCODE_IF: 3371 ++level; 3372 break; 3373 3374 case TGSI_OPCODE_ENDIF: 3375 case TGSI_OPCODE_ELSE: 3376 /* Clear all channels written inside the block from the ACP, but 3377 * leaving those that were not touched. 3378 */ 3379 for (int r = 0; r < this->next_temp; r++) { 3380 for (int c = 0; c < 4; c++) { 3381 if (!acp[4 * r + c]) 3382 continue; 3383 3384 if (acp_level[4 * r + c] >= level) 3385 acp[4 * r + c] = NULL; 3386 } 3387 } 3388 if (inst->op == TGSI_OPCODE_ENDIF) 3389 --level; 3390 break; 3391 3392 default: 3393 /* Continuing the block, clear any written channels from 3394 * the ACP. 3395 */ 3396 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3397 /* Any temporary might be written, so no copy propagation 3398 * across this instruction. 3399 */ 3400 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3401 } else if (inst->dst.file == PROGRAM_OUTPUT && 3402 inst->dst.reladdr) { 3403 /* Any output might be written, so no copy propagation 3404 * from outputs across this instruction. 3405 */ 3406 for (int r = 0; r < this->next_temp; r++) { 3407 for (int c = 0; c < 4; c++) { 3408 if (!acp[4 * r + c]) 3409 continue; 3410 3411 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3412 acp[4 * r + c] = NULL; 3413 } 3414 } 3415 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3416 inst->dst.file == PROGRAM_OUTPUT) { 3417 /* Clear where it's used as dst. */ 3418 if (inst->dst.file == PROGRAM_TEMPORARY) { 3419 for (int c = 0; c < 4; c++) { 3420 if (inst->dst.writemask & (1 << c)) { 3421 acp[4 * inst->dst.index + c] = NULL; 3422 } 3423 } 3424 } 3425 3426 /* Clear where it's used as src. */ 3427 for (int r = 0; r < this->next_temp; r++) { 3428 for (int c = 0; c < 4; c++) { 3429 if (!acp[4 * r + c]) 3430 continue; 3431 3432 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3433 3434 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3435 acp[4 * r + c]->src[0].index == inst->dst.index && 3436 inst->dst.writemask & (1 << src_chan)) 3437 { 3438 acp[4 * r + c] = NULL; 3439 } 3440 } 3441 } 3442 } 3443 break; 3444 } 3445 3446 /* If this is a copy, add it to the ACP. */ 3447 if (inst->op == TGSI_OPCODE_MOV && 3448 inst->dst.file == PROGRAM_TEMPORARY && 3449 !inst->dst.reladdr && 3450 !inst->saturate && 3451 !inst->src[0].reladdr && 3452 !inst->src[0].negate) { 3453 for (int i = 0; i < 4; i++) { 3454 if (inst->dst.writemask & (1 << i)) { 3455 acp[4 * inst->dst.index + i] = inst; 3456 acp_level[4 * inst->dst.index + i] = level; 3457 } 3458 } 3459 } 3460 } 3461 3462 ralloc_free(acp_level); 3463 ralloc_free(acp); 3464} 3465 3466/* 3467 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3468 * 3469 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3470 * will occur. As an example, a TXP production after copy propagation but 3471 * before this pass: 3472 * 3473 * 0: MOV TEMP[1], INPUT[4].xyyy; 3474 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3475 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3476 * 3477 * and after this pass: 3478 * 3479 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3480 * 3481 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3482 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3483 */ 3484void 3485glsl_to_tgsi_visitor::eliminate_dead_code(void) 3486{ 3487 int i; 3488 3489 for (i=0; i < this->next_temp; i++) { 3490 int last_read = get_last_temp_read(i); 3491 int j = 0; 3492 3493 foreach_iter(exec_list_iterator, iter, this->instructions) { 3494 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3495 3496 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3497 j > last_read) 3498 { 3499 iter.remove(); 3500 delete inst; 3501 } 3502 3503 j++; 3504 } 3505 } 3506} 3507 3508/* 3509 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3510 * code elimination. This is less primitive than eliminate_dead_code(), as it 3511 * is per-channel and can detect consecutive writes without a read between them 3512 * as dead code. However, there is some dead code that can be eliminated by 3513 * eliminate_dead_code() but not this function - for example, this function 3514 * cannot eliminate an instruction writing to a register that is never read and 3515 * is the only instruction writing to that register. 3516 * 3517 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3518 * will occur. 3519 */ 3520int 3521glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3522{ 3523 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3524 glsl_to_tgsi_instruction *, 3525 this->next_temp * 4); 3526 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3527 int level = 0; 3528 int removed = 0; 3529 3530 foreach_iter(exec_list_iterator, iter, this->instructions) { 3531 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3532 3533 assert(inst->dst.file != PROGRAM_TEMPORARY 3534 || inst->dst.index < this->next_temp); 3535 3536 switch (inst->op) { 3537 case TGSI_OPCODE_BGNLOOP: 3538 case TGSI_OPCODE_ENDLOOP: 3539 /* End of a basic block, clear the write array entirely. 3540 * FIXME: This keeps us from killing dead code when the writes are 3541 * on either side of a loop, even when the register isn't touched 3542 * inside the loop. 3543 */ 3544 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3545 break; 3546 3547 case TGSI_OPCODE_ENDIF: 3548 --level; 3549 break; 3550 3551 case TGSI_OPCODE_ELSE: 3552 /* Clear all channels written inside the preceding if block from the 3553 * write array, but leave those that were not touched. 3554 * 3555 * FIXME: This destroys opportunities to remove dead code inside of 3556 * IF blocks that are followed by an ELSE block. 3557 */ 3558 for (int r = 0; r < this->next_temp; r++) { 3559 for (int c = 0; c < 4; c++) { 3560 if (!writes[4 * r + c]) 3561 continue; 3562 3563 if (write_level[4 * r + c] >= level) 3564 writes[4 * r + c] = NULL; 3565 } 3566 } 3567 break; 3568 3569 case TGSI_OPCODE_IF: 3570 ++level; 3571 /* fallthrough to default case to mark the condition as read */ 3572 3573 default: 3574 /* Continuing the block, clear any channels from the write array that 3575 * are read by this instruction. 3576 */ 3577 for (unsigned i = 0; i < Elements(inst->src); i++) { 3578 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3579 /* Any temporary might be read, so no dead code elimination 3580 * across this instruction. 3581 */ 3582 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3583 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3584 /* Clear where it's used as src. */ 3585 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3586 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3587 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3588 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3589 3590 for (int c = 0; c < 4; c++) { 3591 if (src_chans & (1 << c)) { 3592 writes[4 * inst->src[i].index + c] = NULL; 3593 } 3594 } 3595 } 3596 } 3597 break; 3598 } 3599 3600 /* If this instruction writes to a temporary, add it to the write array. 3601 * If there is already an instruction in the write array for one or more 3602 * of the channels, flag that channel write as dead. 3603 */ 3604 if (inst->dst.file == PROGRAM_TEMPORARY && 3605 !inst->dst.reladdr && 3606 !inst->saturate) { 3607 for (int c = 0; c < 4; c++) { 3608 if (inst->dst.writemask & (1 << c)) { 3609 if (writes[4 * inst->dst.index + c]) { 3610 if (write_level[4 * inst->dst.index + c] < level) 3611 continue; 3612 else 3613 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3614 } 3615 writes[4 * inst->dst.index + c] = inst; 3616 write_level[4 * inst->dst.index + c] = level; 3617 } 3618 } 3619 } 3620 } 3621 3622 /* Anything still in the write array at this point is dead code. */ 3623 for (int r = 0; r < this->next_temp; r++) { 3624 for (int c = 0; c < 4; c++) { 3625 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3626 if (inst) 3627 inst->dead_mask |= (1 << c); 3628 } 3629 } 3630 3631 /* Now actually remove the instructions that are completely dead and update 3632 * the writemask of other instructions with dead channels. 3633 */ 3634 foreach_iter(exec_list_iterator, iter, this->instructions) { 3635 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3636 3637 if (!inst->dead_mask || !inst->dst.writemask) 3638 continue; 3639 else if (inst->dead_mask == inst->dst.writemask) { 3640 iter.remove(); 3641 delete inst; 3642 removed++; 3643 } else 3644 inst->dst.writemask &= ~(inst->dead_mask); 3645 } 3646 3647 ralloc_free(write_level); 3648 ralloc_free(writes); 3649 3650 return removed; 3651} 3652 3653/* Merges temporary registers together where possible to reduce the number of 3654 * registers needed to run a program. 3655 * 3656 * Produces optimal code only after copy propagation and dead code elimination 3657 * have been run. */ 3658void 3659glsl_to_tgsi_visitor::merge_registers(void) 3660{ 3661 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3662 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3663 int i, j; 3664 3665 /* Read the indices of the last read and first write to each temp register 3666 * into an array so that we don't have to traverse the instruction list as 3667 * much. */ 3668 for (i=0; i < this->next_temp; i++) { 3669 last_reads[i] = get_last_temp_read(i); 3670 first_writes[i] = get_first_temp_write(i); 3671 } 3672 3673 /* Start looking for registers with non-overlapping usages that can be 3674 * merged together. */ 3675 for (i=0; i < this->next_temp; i++) { 3676 /* Don't touch unused registers. */ 3677 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3678 3679 for (j=0; j < this->next_temp; j++) { 3680 /* Don't touch unused registers. */ 3681 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3682 3683 /* We can merge the two registers if the first write to j is after or 3684 * in the same instruction as the last read from i. Note that the 3685 * register at index i will always be used earlier or at the same time 3686 * as the register at index j. */ 3687 if (first_writes[i] <= first_writes[j] && 3688 last_reads[i] <= first_writes[j]) 3689 { 3690 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3691 3692 /* Update the first_writes and last_reads arrays with the new 3693 * values for the merged register index, and mark the newly unused 3694 * register index as such. */ 3695 last_reads[i] = last_reads[j]; 3696 first_writes[j] = -1; 3697 last_reads[j] = -1; 3698 } 3699 } 3700 } 3701 3702 ralloc_free(last_reads); 3703 ralloc_free(first_writes); 3704} 3705 3706/* Reassign indices to temporary registers by reusing unused indices created 3707 * by optimization passes. */ 3708void 3709glsl_to_tgsi_visitor::renumber_registers(void) 3710{ 3711 int i = 0; 3712 int new_index = 0; 3713 3714 for (i=0; i < this->next_temp; i++) { 3715 if (get_first_temp_read(i) < 0) continue; 3716 if (i != new_index) 3717 rename_temp_register(i, new_index); 3718 new_index++; 3719 } 3720 3721 this->next_temp = new_index; 3722} 3723 3724/** 3725 * Returns a fragment program which implements the current pixel transfer ops. 3726 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3727 */ 3728extern "C" void 3729get_pixel_transfer_visitor(struct st_fragment_program *fp, 3730 glsl_to_tgsi_visitor *original, 3731 int scale_and_bias, int pixel_maps) 3732{ 3733 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3734 struct st_context *st = st_context(original->ctx); 3735 struct gl_program *prog = &fp->Base.Base; 3736 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3737 st_src_reg coord, src0; 3738 st_dst_reg dst0; 3739 glsl_to_tgsi_instruction *inst; 3740 3741 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3742 v->ctx = original->ctx; 3743 v->prog = prog; 3744 v->glsl_version = original->glsl_version; 3745 v->native_integers = original->native_integers; 3746 v->options = original->options; 3747 v->next_temp = original->next_temp; 3748 v->num_address_regs = original->num_address_regs; 3749 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3750 v->indirect_addr_temps = original->indirect_addr_temps; 3751 v->indirect_addr_consts = original->indirect_addr_consts; 3752 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3753 3754 /* 3755 * Get initial pixel color from the texture. 3756 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3757 */ 3758 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3759 src0 = v->get_temp(glsl_type::vec4_type); 3760 dst0 = st_dst_reg(src0); 3761 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3762 inst->sampler = 0; 3763 inst->tex_target = TEXTURE_2D_INDEX; 3764 3765 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 3766 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3767 v->samplers_used |= (1 << 0); 3768 3769 if (scale_and_bias) { 3770 static const gl_state_index scale_state[STATE_LENGTH] = 3771 { STATE_INTERNAL, STATE_PT_SCALE, 3772 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3773 static const gl_state_index bias_state[STATE_LENGTH] = 3774 { STATE_INTERNAL, STATE_PT_BIAS, 3775 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3776 GLint scale_p, bias_p; 3777 st_src_reg scale, bias; 3778 3779 scale_p = _mesa_add_state_reference(params, scale_state); 3780 bias_p = _mesa_add_state_reference(params, bias_state); 3781 3782 /* MAD colorTemp, colorTemp, scale, bias; */ 3783 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3784 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3785 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3786 } 3787 3788 if (pixel_maps) { 3789 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3790 st_dst_reg temp_dst = st_dst_reg(temp); 3791 3792 assert(st->pixel_xfer.pixelmap_texture); 3793 3794 /* With a little effort, we can do four pixel map look-ups with 3795 * two TEX instructions: 3796 */ 3797 3798 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3799 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3800 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3801 inst->sampler = 1; 3802 inst->tex_target = TEXTURE_2D_INDEX; 3803 3804 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3805 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3806 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3807 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3808 inst->sampler = 1; 3809 inst->tex_target = TEXTURE_2D_INDEX; 3810 3811 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3812 v->samplers_used |= (1 << 1); 3813 3814 /* MOV colorTemp, temp; */ 3815 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3816 } 3817 3818 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3819 * new visitor. */ 3820 foreach_iter(exec_list_iterator, iter, original->instructions) { 3821 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3822 st_src_reg src_regs[3]; 3823 3824 if (inst->dst.file == PROGRAM_OUTPUT) 3825 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3826 3827 for (int i=0; i<3; i++) { 3828 src_regs[i] = inst->src[i]; 3829 if (src_regs[i].file == PROGRAM_INPUT && 3830 src_regs[i].index == FRAG_ATTRIB_COL0) 3831 { 3832 src_regs[i].file = PROGRAM_TEMPORARY; 3833 src_regs[i].index = src0.index; 3834 } 3835 else if (src_regs[i].file == PROGRAM_INPUT) 3836 prog->InputsRead |= (1 << src_regs[i].index); 3837 } 3838 3839 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3840 } 3841 3842 /* Make modifications to fragment program info. */ 3843 prog->Parameters = _mesa_combine_parameter_lists(params, 3844 original->prog->Parameters); 3845 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); 3846 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); 3847 _mesa_free_parameter_list(params); 3848 count_resources(v, prog); 3849 fp->glsl_to_tgsi = v; 3850} 3851 3852/** 3853 * Make fragment program for glBitmap: 3854 * Sample the texture and kill the fragment if the bit is 0. 3855 * This program will be combined with the user's fragment program. 3856 * 3857 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3858 */ 3859extern "C" void 3860get_bitmap_visitor(struct st_fragment_program *fp, 3861 glsl_to_tgsi_visitor *original, int samplerIndex) 3862{ 3863 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3864 struct st_context *st = st_context(original->ctx); 3865 struct gl_program *prog = &fp->Base.Base; 3866 st_src_reg coord, src0; 3867 st_dst_reg dst0; 3868 glsl_to_tgsi_instruction *inst; 3869 3870 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3871 v->ctx = original->ctx; 3872 v->prog = prog; 3873 v->glsl_version = original->glsl_version; 3874 v->native_integers = original->native_integers; 3875 v->options = original->options; 3876 v->next_temp = original->next_temp; 3877 v->num_address_regs = original->num_address_regs; 3878 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3879 v->indirect_addr_temps = original->indirect_addr_temps; 3880 v->indirect_addr_consts = original->indirect_addr_consts; 3881 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3882 3883 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3884 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3885 src0 = v->get_temp(glsl_type::vec4_type); 3886 dst0 = st_dst_reg(src0); 3887 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3888 inst->sampler = samplerIndex; 3889 inst->tex_target = TEXTURE_2D_INDEX; 3890 3891 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 3892 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3893 v->samplers_used |= (1 << samplerIndex); 3894 3895 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3896 src0.negate = NEGATE_XYZW; 3897 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3898 src0.swizzle = SWIZZLE_XXXX; 3899 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3900 3901 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3902 * new visitor. */ 3903 foreach_iter(exec_list_iterator, iter, original->instructions) { 3904 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3905 st_src_reg src_regs[3]; 3906 3907 if (inst->dst.file == PROGRAM_OUTPUT) 3908 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3909 3910 for (int i=0; i<3; i++) { 3911 src_regs[i] = inst->src[i]; 3912 if (src_regs[i].file == PROGRAM_INPUT) 3913 prog->InputsRead |= (1 << src_regs[i].index); 3914 } 3915 3916 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3917 } 3918 3919 /* Make modifications to fragment program info. */ 3920 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3921 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); 3922 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); 3923 count_resources(v, prog); 3924 fp->glsl_to_tgsi = v; 3925} 3926 3927/* ------------------------- TGSI conversion stuff -------------------------- */ 3928struct label { 3929 unsigned branch_target; 3930 unsigned token; 3931}; 3932 3933/** 3934 * Intermediate state used during shader translation. 3935 */ 3936struct st_translate { 3937 struct ureg_program *ureg; 3938 3939 struct ureg_dst temps[MAX_TEMPS]; 3940 struct ureg_src *constants; 3941 struct ureg_src *immediates; 3942 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3943 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3944 struct ureg_dst address[1]; 3945 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3946 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3947 3948 /* Extra info for handling point size clamping in vertex shader */ 3949 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3950 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3951 GLint pointSizeOutIndex; /**< Temp point size output register */ 3952 GLboolean prevInstWrotePointSize; 3953 3954 const GLuint *inputMapping; 3955 const GLuint *outputMapping; 3956 3957 /* For every instruction that contains a label (eg CALL), keep 3958 * details so that we can go back afterwards and emit the correct 3959 * tgsi instruction number for each label. 3960 */ 3961 struct label *labels; 3962 unsigned labels_size; 3963 unsigned labels_count; 3964 3965 /* Keep a record of the tgsi instruction number that each mesa 3966 * instruction starts at, will be used to fix up labels after 3967 * translation. 3968 */ 3969 unsigned *insn; 3970 unsigned insn_size; 3971 unsigned insn_count; 3972 3973 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3974 3975 boolean error; 3976}; 3977 3978/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3979static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3980 TGSI_SEMANTIC_FACE, 3981 TGSI_SEMANTIC_INSTANCEID 3982}; 3983 3984/** 3985 * Make note of a branch to a label in the TGSI code. 3986 * After we've emitted all instructions, we'll go over the list 3987 * of labels built here and patch the TGSI code with the actual 3988 * location of each label. 3989 */ 3990static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3991{ 3992 unsigned i; 3993 3994 if (t->labels_count + 1 >= t->labels_size) { 3995 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3996 t->labels = (struct label *)realloc(t->labels, 3997 t->labels_size * sizeof(struct label)); 3998 if (t->labels == NULL) { 3999 static unsigned dummy; 4000 t->error = TRUE; 4001 return &dummy; 4002 } 4003 } 4004 4005 i = t->labels_count++; 4006 t->labels[i].branch_target = branch_target; 4007 return &t->labels[i].token; 4008} 4009 4010/** 4011 * Called prior to emitting the TGSI code for each instruction. 4012 * Allocate additional space for instructions if needed. 4013 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 4014 * the next TGSI instruction. 4015 */ 4016static void set_insn_start(struct st_translate *t, unsigned start) 4017{ 4018 if (t->insn_count + 1 >= t->insn_size) { 4019 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 4020 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 4021 if (t->insn == NULL) { 4022 t->error = TRUE; 4023 return; 4024 } 4025 } 4026 4027 t->insn[t->insn_count++] = start; 4028} 4029 4030/** 4031 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 4032 */ 4033static struct ureg_src 4034emit_immediate(struct st_translate *t, 4035 gl_constant_value values[4], 4036 int type, int size) 4037{ 4038 struct ureg_program *ureg = t->ureg; 4039 4040 switch(type) 4041 { 4042 case GL_FLOAT: 4043 return ureg_DECL_immediate(ureg, &values[0].f, size); 4044 case GL_INT: 4045 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 4046 case GL_UNSIGNED_INT: 4047 case GL_BOOL: 4048 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 4049 default: 4050 assert(!"should not get here - type must be float, int, uint, or bool"); 4051 return ureg_src_undef(); 4052 } 4053} 4054 4055/** 4056 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 4057 */ 4058static struct ureg_dst 4059dst_register(struct st_translate *t, 4060 gl_register_file file, 4061 GLuint index) 4062{ 4063 switch(file) { 4064 case PROGRAM_UNDEFINED: 4065 return ureg_dst_undef(); 4066 4067 case PROGRAM_TEMPORARY: 4068 if (ureg_dst_is_undef(t->temps[index])) 4069 t->temps[index] = ureg_DECL_temporary(t->ureg); 4070 4071 return t->temps[index]; 4072 4073 case PROGRAM_OUTPUT: 4074 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 4075 t->prevInstWrotePointSize = GL_TRUE; 4076 4077 if (t->procType == TGSI_PROCESSOR_VERTEX) 4078 assert(index < VERT_RESULT_MAX); 4079 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4080 assert(index < FRAG_RESULT_MAX); 4081 else 4082 assert(index < GEOM_RESULT_MAX); 4083 4084 assert(t->outputMapping[index] < Elements(t->outputs)); 4085 4086 return t->outputs[t->outputMapping[index]]; 4087 4088 case PROGRAM_ADDRESS: 4089 return t->address[index]; 4090 4091 default: 4092 assert(!"unknown dst register file"); 4093 return ureg_dst_undef(); 4094 } 4095} 4096 4097/** 4098 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4099 */ 4100static struct ureg_src 4101src_register(struct st_translate *t, 4102 gl_register_file file, 4103 GLuint index) 4104{ 4105 switch(file) { 4106 case PROGRAM_UNDEFINED: 4107 return ureg_src_undef(); 4108 4109 case PROGRAM_TEMPORARY: 4110 assert(index >= 0); 4111 assert(index < Elements(t->temps)); 4112 if (ureg_dst_is_undef(t->temps[index])) 4113 t->temps[index] = ureg_DECL_temporary(t->ureg); 4114 return ureg_src(t->temps[index]); 4115 4116 case PROGRAM_NAMED_PARAM: 4117 case PROGRAM_ENV_PARAM: 4118 case PROGRAM_LOCAL_PARAM: 4119 case PROGRAM_UNIFORM: 4120 assert(index >= 0); 4121 return t->constants[index]; 4122 case PROGRAM_STATE_VAR: 4123 case PROGRAM_CONSTANT: /* ie, immediate */ 4124 if (index < 0) 4125 return ureg_DECL_constant(t->ureg, 0); 4126 else 4127 return t->constants[index]; 4128 4129 case PROGRAM_IMMEDIATE: 4130 return t->immediates[index]; 4131 4132 case PROGRAM_INPUT: 4133 assert(t->inputMapping[index] < Elements(t->inputs)); 4134 return t->inputs[t->inputMapping[index]]; 4135 4136 case PROGRAM_OUTPUT: 4137 assert(t->outputMapping[index] < Elements(t->outputs)); 4138 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4139 4140 case PROGRAM_ADDRESS: 4141 return ureg_src(t->address[index]); 4142 4143 case PROGRAM_SYSTEM_VALUE: 4144 assert(index < Elements(t->systemValues)); 4145 return t->systemValues[index]; 4146 4147 default: 4148 assert(!"unknown src register file"); 4149 return ureg_src_undef(); 4150 } 4151} 4152 4153/** 4154 * Create a TGSI ureg_dst register from an st_dst_reg. 4155 */ 4156static struct ureg_dst 4157translate_dst(struct st_translate *t, 4158 const st_dst_reg *dst_reg, 4159 bool saturate) 4160{ 4161 struct ureg_dst dst = dst_register(t, 4162 dst_reg->file, 4163 dst_reg->index); 4164 4165 dst = ureg_writemask(dst, dst_reg->writemask); 4166 4167 if (saturate) 4168 dst = ureg_saturate(dst); 4169 4170 if (dst_reg->reladdr != NULL) 4171 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4172 4173 return dst; 4174} 4175 4176/** 4177 * Create a TGSI ureg_src register from an st_src_reg. 4178 */ 4179static struct ureg_src 4180translate_src(struct st_translate *t, const st_src_reg *src_reg) 4181{ 4182 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4183 4184 src = ureg_swizzle(src, 4185 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4186 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4187 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4188 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4189 4190 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4191 src = ureg_negate(src); 4192 4193 if (src_reg->reladdr != NULL) { 4194 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4195 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4196 * set the bit for src.Negate. So we have to do the operation manually 4197 * here to work around the compiler's problems. */ 4198 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4199 struct ureg_src addr = ureg_src(t->address[0]); 4200 src.Indirect = 1; 4201 src.IndirectFile = addr.File; 4202 src.IndirectIndex = addr.Index; 4203 src.IndirectSwizzle = addr.SwizzleX; 4204 4205 if (src_reg->file != PROGRAM_INPUT && 4206 src_reg->file != PROGRAM_OUTPUT) { 4207 /* If src_reg->index was negative, it was set to zero in 4208 * src_register(). Reassign it now. But don't do this 4209 * for input/output regs since they get remapped while 4210 * const buffers don't. 4211 */ 4212 src.Index = src_reg->index; 4213 } 4214 } 4215 4216 return src; 4217} 4218 4219static void 4220compile_tgsi_instruction(struct st_translate *t, 4221 const glsl_to_tgsi_instruction *inst) 4222{ 4223 struct ureg_program *ureg = t->ureg; 4224 GLuint i; 4225 struct ureg_dst dst[1]; 4226 struct ureg_src src[4]; 4227 unsigned num_dst; 4228 unsigned num_src; 4229 4230 num_dst = num_inst_dst_regs(inst->op); 4231 num_src = num_inst_src_regs(inst->op); 4232 4233 if (num_dst) 4234 dst[0] = translate_dst(t, 4235 &inst->dst, 4236 inst->saturate); 4237 4238 for (i = 0; i < num_src; i++) 4239 src[i] = translate_src(t, &inst->src[i]); 4240 4241 switch(inst->op) { 4242 case TGSI_OPCODE_BGNLOOP: 4243 case TGSI_OPCODE_CAL: 4244 case TGSI_OPCODE_ELSE: 4245 case TGSI_OPCODE_ENDLOOP: 4246 case TGSI_OPCODE_IF: 4247 assert(num_dst == 0); 4248 ureg_label_insn(ureg, 4249 inst->op, 4250 src, num_src, 4251 get_label(t, 4252 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4253 return; 4254 4255 case TGSI_OPCODE_TEX: 4256 case TGSI_OPCODE_TXB: 4257 case TGSI_OPCODE_TXD: 4258 case TGSI_OPCODE_TXL: 4259 case TGSI_OPCODE_TXP: 4260 case TGSI_OPCODE_TXQ: 4261 case TGSI_OPCODE_TXF: 4262 src[num_src++] = t->samplers[inst->sampler]; 4263 ureg_tex_insn(ureg, 4264 inst->op, 4265 dst, num_dst, 4266 translate_texture_target(inst->tex_target, inst->tex_shadow), 4267 src, num_src); 4268 return; 4269 4270 case TGSI_OPCODE_SCS: 4271 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4272 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4273 break; 4274 4275 default: 4276 ureg_insn(ureg, 4277 inst->op, 4278 dst, num_dst, 4279 src, num_src); 4280 break; 4281 } 4282} 4283 4284/** 4285 * Emit the TGSI instructions to adjust the WPOS pixel center convention 4286 * Basically, add (adjX, adjY) to the fragment position. 4287 */ 4288static void 4289emit_adjusted_wpos(struct st_translate *t, 4290 const struct gl_program *program, 4291 float adjX, float adjY) 4292{ 4293 struct ureg_program *ureg = t->ureg; 4294 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 4295 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4296 4297 /* Note that we bias X and Y and pass Z and W through unchanged. 4298 * The shader might also use gl_FragCoord.w and .z. 4299 */ 4300 ureg_ADD(ureg, wpos_temp, wpos_input, 4301 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); 4302 4303 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4304} 4305 4306 4307/** 4308 * Emit the TGSI instructions for inverting the WPOS y coordinate. 4309 * This code is unavoidable because it also depends on whether 4310 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4311 */ 4312static void 4313emit_wpos_inversion(struct st_translate *t, 4314 const struct gl_program *program, 4315 bool invert) 4316{ 4317 struct ureg_program *ureg = t->ureg; 4318 4319 /* Fragment program uses fragment position input. 4320 * Need to replace instances of INPUT[WPOS] with temp T 4321 * where T = INPUT[WPOS] by y is inverted. 4322 */ 4323 static const gl_state_index wposTransformState[STATE_LENGTH] 4324 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4325 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4326 4327 /* XXX: note we are modifying the incoming shader here! Need to 4328 * do this before emitting the constant decls below, or this 4329 * will be missed: 4330 */ 4331 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4332 wposTransformState); 4333 4334 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); 4335 struct ureg_dst wpos_temp; 4336 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4337 4338 /* MOV wpos_temp, input[wpos] 4339 */ 4340 if (wpos_input.File == TGSI_FILE_TEMPORARY) 4341 wpos_temp = ureg_dst(wpos_input); 4342 else { 4343 wpos_temp = ureg_DECL_temporary(ureg); 4344 ureg_MOV(ureg, wpos_temp, wpos_input); 4345 } 4346 4347 if (invert) { 4348 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4349 */ 4350 ureg_MAD(ureg, 4351 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4352 wpos_input, 4353 ureg_scalar(wpostrans, 0), 4354 ureg_scalar(wpostrans, 1)); 4355 } else { 4356 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4357 */ 4358 ureg_MAD(ureg, 4359 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4360 wpos_input, 4361 ureg_scalar(wpostrans, 2), 4362 ureg_scalar(wpostrans, 3)); 4363 } 4364 4365 /* Use wpos_temp as position input from here on: 4366 */ 4367 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4368} 4369 4370 4371/** 4372 * Emit fragment position/ooordinate code. 4373 */ 4374static void 4375emit_wpos(struct st_context *st, 4376 struct st_translate *t, 4377 const struct gl_program *program, 4378 struct ureg_program *ureg) 4379{ 4380 const struct gl_fragment_program *fp = 4381 (const struct gl_fragment_program *) program; 4382 struct pipe_screen *pscreen = st->pipe->screen; 4383 boolean invert = FALSE; 4384 4385 if (fp->OriginUpperLeft) { 4386 /* Fragment shader wants origin in upper-left */ 4387 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4388 /* the driver supports upper-left origin */ 4389 } 4390 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4391 /* the driver supports lower-left origin, need to invert Y */ 4392 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4393 invert = TRUE; 4394 } 4395 else 4396 assert(0); 4397 } 4398 else { 4399 /* Fragment shader wants origin in lower-left */ 4400 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4401 /* the driver supports lower-left origin */ 4402 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4403 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4404 /* the driver supports upper-left origin, need to invert Y */ 4405 invert = TRUE; 4406 else 4407 assert(0); 4408 } 4409 4410 if (fp->PixelCenterInteger) { 4411 /* Fragment shader wants pixel center integer */ 4412 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 4413 /* the driver supports pixel center integer */ 4414 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4415 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 4416 /* the driver supports pixel center half integer, need to bias X,Y */ 4417 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); 4418 else 4419 assert(0); 4420 } 4421 else { 4422 /* Fragment shader wants pixel center half integer */ 4423 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4424 /* the driver supports pixel center half integer */ 4425 } 4426 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4427 /* the driver supports pixel center integer, need to bias X,Y */ 4428 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4429 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); 4430 } 4431 else 4432 assert(0); 4433 } 4434 4435 /* we invert after adjustment so that we avoid the MOV to temporary, 4436 * and reuse the adjustment ADD instead */ 4437 emit_wpos_inversion(t, program, invert); 4438} 4439 4440/** 4441 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4442 * TGSI uses +1 for front, -1 for back. 4443 * This function converts the TGSI value to the GL value. Simply clamping/ 4444 * saturating the value to [0,1] does the job. 4445 */ 4446static void 4447emit_face_var(struct st_translate *t) 4448{ 4449 struct ureg_program *ureg = t->ureg; 4450 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4451 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4452 4453 /* MOV_SAT face_temp, input[face] */ 4454 face_temp = ureg_saturate(face_temp); 4455 ureg_MOV(ureg, face_temp, face_input); 4456 4457 /* Use face_temp as face input from here on: */ 4458 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4459} 4460 4461static void 4462emit_edgeflags(struct st_translate *t) 4463{ 4464 struct ureg_program *ureg = t->ureg; 4465 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4466 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4467 4468 ureg_MOV(ureg, edge_dst, edge_src); 4469} 4470 4471/** 4472 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4473 * \param program the program to translate 4474 * \param numInputs number of input registers used 4475 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4476 * input indexes 4477 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4478 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4479 * each input 4480 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4481 * \param numOutputs number of output registers used 4482 * \param outputMapping maps Mesa fragment program outputs to TGSI 4483 * generic outputs 4484 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4485 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4486 * each output 4487 * 4488 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4489 */ 4490extern "C" enum pipe_error 4491st_translate_program( 4492 struct gl_context *ctx, 4493 uint procType, 4494 struct ureg_program *ureg, 4495 glsl_to_tgsi_visitor *program, 4496 const struct gl_program *proginfo, 4497 GLuint numInputs, 4498 const GLuint inputMapping[], 4499 const ubyte inputSemanticName[], 4500 const ubyte inputSemanticIndex[], 4501 const GLuint interpMode[], 4502 GLuint numOutputs, 4503 const GLuint outputMapping[], 4504 const ubyte outputSemanticName[], 4505 const ubyte outputSemanticIndex[], 4506 boolean passthrough_edgeflags) 4507{ 4508 struct st_translate translate, *t; 4509 unsigned i; 4510 enum pipe_error ret = PIPE_OK; 4511 4512 assert(numInputs <= Elements(t->inputs)); 4513 assert(numOutputs <= Elements(t->outputs)); 4514 4515 t = &translate; 4516 memset(t, 0, sizeof *t); 4517 4518 t->procType = procType; 4519 t->inputMapping = inputMapping; 4520 t->outputMapping = outputMapping; 4521 t->ureg = ureg; 4522 t->pointSizeOutIndex = -1; 4523 t->prevInstWrotePointSize = GL_FALSE; 4524 4525 /* 4526 * Declare input attributes. 4527 */ 4528 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4529 for (i = 0; i < numInputs; i++) { 4530 t->inputs[i] = ureg_DECL_fs_input(ureg, 4531 inputSemanticName[i], 4532 inputSemanticIndex[i], 4533 interpMode[i]); 4534 } 4535 4536 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4537 /* Must do this after setting up t->inputs, and before 4538 * emitting constant references, below: 4539 */ 4540 emit_wpos(st_context(ctx), t, proginfo, ureg); 4541 } 4542 4543 if (proginfo->InputsRead & FRAG_BIT_FACE) 4544 emit_face_var(t); 4545 4546 /* 4547 * Declare output attributes. 4548 */ 4549 for (i = 0; i < numOutputs; i++) { 4550 switch (outputSemanticName[i]) { 4551 case TGSI_SEMANTIC_POSITION: 4552 t->outputs[i] = ureg_DECL_output(ureg, 4553 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4554 outputSemanticIndex[i]); 4555 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4556 break; 4557 case TGSI_SEMANTIC_STENCIL: 4558 t->outputs[i] = ureg_DECL_output(ureg, 4559 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4560 outputSemanticIndex[i]); 4561 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4562 break; 4563 case TGSI_SEMANTIC_COLOR: 4564 t->outputs[i] = ureg_DECL_output(ureg, 4565 TGSI_SEMANTIC_COLOR, 4566 outputSemanticIndex[i]); 4567 break; 4568 default: 4569 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4570 return PIPE_ERROR_BAD_INPUT; 4571 } 4572 } 4573 } 4574 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4575 for (i = 0; i < numInputs; i++) { 4576 t->inputs[i] = ureg_DECL_gs_input(ureg, 4577 i, 4578 inputSemanticName[i], 4579 inputSemanticIndex[i]); 4580 } 4581 4582 for (i = 0; i < numOutputs; i++) { 4583 t->outputs[i] = ureg_DECL_output(ureg, 4584 outputSemanticName[i], 4585 outputSemanticIndex[i]); 4586 } 4587 } 4588 else { 4589 assert(procType == TGSI_PROCESSOR_VERTEX); 4590 4591 for (i = 0; i < numInputs; i++) { 4592 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4593 } 4594 4595 for (i = 0; i < numOutputs; i++) { 4596 t->outputs[i] = ureg_DECL_output(ureg, 4597 outputSemanticName[i], 4598 outputSemanticIndex[i]); 4599 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4600 /* Writing to the point size result register requires special 4601 * handling to implement clamping. 4602 */ 4603 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4604 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4605 /* XXX: note we are modifying the incoming shader here! Need to 4606 * do this before emitting the constant decls below, or this 4607 * will be missed. 4608 */ 4609 unsigned pointSizeClampConst = 4610 _mesa_add_state_reference(proginfo->Parameters, 4611 pointSizeClampState); 4612 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); 4613 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); 4614 t->pointSizeResult = t->outputs[i]; 4615 t->pointSizeOutIndex = i; 4616 t->outputs[i] = psizregtemp; 4617 } 4618 } 4619 if (passthrough_edgeflags) 4620 emit_edgeflags(t); 4621 } 4622 4623 /* Declare address register. 4624 */ 4625 if (program->num_address_regs > 0) { 4626 assert(program->num_address_regs == 1); 4627 t->address[0] = ureg_DECL_address(ureg); 4628 } 4629 4630 /* Declare misc input registers 4631 */ 4632 { 4633 GLbitfield sysInputs = proginfo->SystemValuesRead; 4634 unsigned numSys = 0; 4635 for (i = 0; sysInputs; i++) { 4636 if (sysInputs & (1 << i)) { 4637 unsigned semName = mesa_sysval_to_semantic[i]; 4638 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4639 numSys++; 4640 sysInputs &= ~(1 << i); 4641 } 4642 } 4643 } 4644 4645 if (program->indirect_addr_temps) { 4646 /* If temps are accessed with indirect addressing, declare temporaries 4647 * in sequential order. Else, we declare them on demand elsewhere. 4648 * (Note: the number of temporaries is equal to program->next_temp) 4649 */ 4650 for (i = 0; i < (unsigned)program->next_temp; i++) { 4651 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4652 t->temps[i] = ureg_DECL_temporary(t->ureg); 4653 } 4654 } 4655 4656 /* Emit constants and uniforms. TGSI uses a single index space for these, 4657 * so we put all the translated regs in t->constants. 4658 */ 4659 if (proginfo->Parameters) { 4660 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4661 if (t->constants == NULL) { 4662 ret = PIPE_ERROR_OUT_OF_MEMORY; 4663 goto out; 4664 } 4665 4666 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4667 switch (proginfo->Parameters->Parameters[i].Type) { 4668 case PROGRAM_ENV_PARAM: 4669 case PROGRAM_LOCAL_PARAM: 4670 case PROGRAM_STATE_VAR: 4671 case PROGRAM_NAMED_PARAM: 4672 case PROGRAM_UNIFORM: 4673 t->constants[i] = ureg_DECL_constant(ureg, i); 4674 break; 4675 4676 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4677 * addressing of the const buffer. 4678 * FIXME: Be smarter and recognize param arrays: 4679 * indirect addressing is only valid within the referenced 4680 * array. 4681 */ 4682 case PROGRAM_CONSTANT: 4683 if (program->indirect_addr_consts) 4684 t->constants[i] = ureg_DECL_constant(ureg, i); 4685 else 4686 t->constants[i] = emit_immediate(t, 4687 proginfo->Parameters->ParameterValues[i], 4688 proginfo->Parameters->Parameters[i].DataType, 4689 4); 4690 break; 4691 default: 4692 break; 4693 } 4694 } 4695 } 4696 4697 /* Emit immediate values. 4698 */ 4699 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4700 if (t->immediates == NULL) { 4701 ret = PIPE_ERROR_OUT_OF_MEMORY; 4702 goto out; 4703 } 4704 i = 0; 4705 foreach_iter(exec_list_iterator, iter, program->immediates) { 4706 immediate_storage *imm = (immediate_storage *)iter.get(); 4707 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4708 } 4709 4710 /* texture samplers */ 4711 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4712 if (program->samplers_used & (1 << i)) { 4713 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4714 } 4715 } 4716 4717 /* Emit each instruction in turn: 4718 */ 4719 foreach_iter(exec_list_iterator, iter, program->instructions) { 4720 set_insn_start(t, ureg_get_instruction_number(ureg)); 4721 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); 4722 4723 if (t->prevInstWrotePointSize && proginfo->Id) { 4724 /* The previous instruction wrote to the (fake) vertex point size 4725 * result register. Now we need to clamp that value to the min/max 4726 * point size range, putting the result into the real point size 4727 * register. 4728 * Note that we can't do this easily at the end of program due to 4729 * possible early return. 4730 */ 4731 set_insn_start(t, ureg_get_instruction_number(ureg)); 4732 ureg_MAX(t->ureg, 4733 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4734 ureg_src(t->outputs[t->pointSizeOutIndex]), 4735 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4736 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4737 ureg_src(t->outputs[t->pointSizeOutIndex]), 4738 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4739 } 4740 t->prevInstWrotePointSize = GL_FALSE; 4741 } 4742 4743 /* Fix up all emitted labels: 4744 */ 4745 for (i = 0; i < t->labels_count; i++) { 4746 ureg_fixup_label(ureg, t->labels[i].token, 4747 t->insn[t->labels[i].branch_target]); 4748 } 4749 4750out: 4751 FREE(t->insn); 4752 FREE(t->labels); 4753 FREE(t->constants); 4754 FREE(t->immediates); 4755 4756 if (t->error) { 4757 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4758 } 4759 4760 return ret; 4761} 4762/* ----------------------------- End TGSI code ------------------------------ */ 4763 4764/** 4765 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4766 * generating Mesa IR. 4767 */ 4768static struct gl_program * 4769get_mesa_program(struct gl_context *ctx, 4770 struct gl_shader_program *shader_program, 4771 struct gl_shader *shader) 4772{ 4773 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4774 struct gl_program *prog; 4775 GLenum target; 4776 const char *target_string; 4777 bool progress; 4778 struct gl_shader_compiler_options *options = 4779 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4780 4781 switch (shader->Type) { 4782 case GL_VERTEX_SHADER: 4783 target = GL_VERTEX_PROGRAM_ARB; 4784 target_string = "vertex"; 4785 break; 4786 case GL_FRAGMENT_SHADER: 4787 target = GL_FRAGMENT_PROGRAM_ARB; 4788 target_string = "fragment"; 4789 break; 4790 case GL_GEOMETRY_SHADER: 4791 target = GL_GEOMETRY_PROGRAM_NV; 4792 target_string = "geometry"; 4793 break; 4794 default: 4795 assert(!"should not be reached"); 4796 return NULL; 4797 } 4798 4799 validate_ir_tree(shader->ir); 4800 4801 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4802 if (!prog) 4803 return NULL; 4804 prog->Parameters = _mesa_new_parameter_list(); 4805 prog->Varying = _mesa_new_parameter_list(); 4806 prog->Attributes = _mesa_new_parameter_list(); 4807 v->ctx = ctx; 4808 v->prog = prog; 4809 v->shader_program = shader_program; 4810 v->options = options; 4811 v->glsl_version = ctx->Const.GLSLVersion; 4812 v->native_integers = ctx->Const.NativeIntegers; 4813 4814 add_uniforms_to_parameters_list(shader_program, shader, prog); 4815 4816 /* Emit intermediate IR for main(). */ 4817 visit_exec_list(shader->ir, v); 4818 4819 /* Now emit bodies for any functions that were used. */ 4820 do { 4821 progress = GL_FALSE; 4822 4823 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4824 function_entry *entry = (function_entry *)iter.get(); 4825 4826 if (!entry->bgn_inst) { 4827 v->current_function = entry; 4828 4829 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4830 entry->bgn_inst->function = entry; 4831 4832 visit_exec_list(&entry->sig->body, v); 4833 4834 glsl_to_tgsi_instruction *last; 4835 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4836 if (last->op != TGSI_OPCODE_RET) 4837 v->emit(NULL, TGSI_OPCODE_RET); 4838 4839 glsl_to_tgsi_instruction *end; 4840 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4841 end->function = entry; 4842 4843 progress = GL_TRUE; 4844 } 4845 } 4846 } while (progress); 4847 4848#if 0 4849 /* Print out some information (for debugging purposes) used by the 4850 * optimization passes. */ 4851 for (i=0; i < v->next_temp; i++) { 4852 int fr = v->get_first_temp_read(i); 4853 int fw = v->get_first_temp_write(i); 4854 int lr = v->get_last_temp_read(i); 4855 int lw = v->get_last_temp_write(i); 4856 4857 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4858 assert(fw <= fr); 4859 } 4860#endif 4861 4862 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4863 v->remove_output_reads(PROGRAM_OUTPUT); 4864 if (target == GL_VERTEX_PROGRAM_ARB) 4865 v->remove_output_reads(PROGRAM_VARYING); 4866 4867 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4868 v->simplify_cmp(); 4869 v->copy_propagate(); 4870 while (v->eliminate_dead_code_advanced()); 4871 4872 /* FIXME: These passes to optimize temporary registers don't work when there 4873 * is indirect addressing of the temporary register space. We need proper 4874 * array support so that we don't have to give up these passes in every 4875 * shader that uses arrays. 4876 */ 4877 if (!v->indirect_addr_temps) { 4878 v->eliminate_dead_code(); 4879 v->merge_registers(); 4880 v->renumber_registers(); 4881 } 4882 4883 /* Write the END instruction. */ 4884 v->emit(NULL, TGSI_OPCODE_END); 4885 4886 if (ctx->Shader.Flags & GLSL_DUMP) { 4887 printf("\n"); 4888 printf("GLSL IR for linked %s program %d:\n", target_string, 4889 shader_program->Name); 4890 _mesa_print_ir(shader->ir, NULL); 4891 printf("\n"); 4892 printf("\n"); 4893 } 4894 4895 prog->Instructions = NULL; 4896 prog->NumInstructions = 0; 4897 4898 do_set_program_inouts(shader->ir, prog); 4899 count_resources(v, prog); 4900 4901 check_resources(ctx, shader_program, v, prog); 4902 4903 _mesa_reference_program(ctx, &shader->Program, prog); 4904 4905 struct st_vertex_program *stvp; 4906 struct st_fragment_program *stfp; 4907 struct st_geometry_program *stgp; 4908 4909 switch (shader->Type) { 4910 case GL_VERTEX_SHADER: 4911 stvp = (struct st_vertex_program *)prog; 4912 stvp->glsl_to_tgsi = v; 4913 break; 4914 case GL_FRAGMENT_SHADER: 4915 stfp = (struct st_fragment_program *)prog; 4916 stfp->glsl_to_tgsi = v; 4917 break; 4918 case GL_GEOMETRY_SHADER: 4919 stgp = (struct st_geometry_program *)prog; 4920 stgp->glsl_to_tgsi = v; 4921 break; 4922 default: 4923 assert(!"should not be reached"); 4924 return NULL; 4925 } 4926 4927 return prog; 4928} 4929 4930extern "C" { 4931 4932struct gl_shader * 4933st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4934{ 4935 struct gl_shader *shader; 4936 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4937 type == GL_GEOMETRY_SHADER_ARB); 4938 shader = rzalloc(NULL, struct gl_shader); 4939 if (shader) { 4940 shader->Type = type; 4941 shader->Name = name; 4942 _mesa_init_shader(ctx, shader); 4943 } 4944 return shader; 4945} 4946 4947struct gl_shader_program * 4948st_new_shader_program(struct gl_context *ctx, GLuint name) 4949{ 4950 struct gl_shader_program *shProg; 4951 shProg = rzalloc(NULL, struct gl_shader_program); 4952 if (shProg) { 4953 shProg->Name = name; 4954 _mesa_init_shader_program(ctx, shProg); 4955 } 4956 return shProg; 4957} 4958 4959/** 4960 * Link a shader. 4961 * Called via ctx->Driver.LinkShader() 4962 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4963 * with code lowering and other optimizations. 4964 */ 4965GLboolean 4966st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4967{ 4968 assert(prog->LinkStatus); 4969 4970 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4971 if (prog->_LinkedShaders[i] == NULL) 4972 continue; 4973 4974 bool progress; 4975 exec_list *ir = prog->_LinkedShaders[i]->ir; 4976 const struct gl_shader_compiler_options *options = 4977 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4978 4979 do { 4980 progress = false; 4981 4982 /* Lowering */ 4983 do_mat_op_to_vec(ir); 4984 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 4985 | LOG_TO_LOG2 4986 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 4987 4988 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 4989 4990 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; 4991 4992 progress = lower_quadop_vector(ir, false) || progress; 4993 4994 if (options->EmitNoIfs) { 4995 progress = lower_discard(ir) || progress; 4996 progress = lower_if_to_cond_assign(ir) || progress; 4997 } 4998 4999 if (options->EmitNoNoise) 5000 progress = lower_noise(ir) || progress; 5001 5002 /* If there are forms of indirect addressing that the driver 5003 * cannot handle, perform the lowering pass. 5004 */ 5005 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5006 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5007 progress = 5008 lower_variable_index_to_cond_assign(ir, 5009 options->EmitNoIndirectInput, 5010 options->EmitNoIndirectOutput, 5011 options->EmitNoIndirectTemp, 5012 options->EmitNoIndirectUniform) 5013 || progress; 5014 5015 progress = do_vec_index_to_cond_assign(ir) || progress; 5016 } while (progress); 5017 5018 validate_ir_tree(ir); 5019 } 5020 5021 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5022 struct gl_program *linked_prog; 5023 5024 if (prog->_LinkedShaders[i] == NULL) 5025 continue; 5026 5027 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 5028 5029 if (linked_prog) { 5030 bool ok = true; 5031 5032 switch (prog->_LinkedShaders[i]->Type) { 5033 case GL_VERTEX_SHADER: 5034 _mesa_reference_vertprog(ctx, &prog->VertexProgram, 5035 (struct gl_vertex_program *)linked_prog); 5036 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, 5037 linked_prog); 5038 break; 5039 case GL_FRAGMENT_SHADER: 5040 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, 5041 (struct gl_fragment_program *)linked_prog); 5042 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, 5043 linked_prog); 5044 break; 5045 case GL_GEOMETRY_SHADER: 5046 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, 5047 (struct gl_geometry_program *)linked_prog); 5048 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, 5049 linked_prog); 5050 break; 5051 } 5052 if (!ok) { 5053 return GL_FALSE; 5054 } 5055 } 5056 5057 _mesa_reference_program(ctx, &linked_prog, NULL); 5058 } 5059 5060 return GL_TRUE; 5061} 5062 5063} /* extern "C" */ 5064