st_glsl_to_tgsi.cpp revision 4683529048ee133481b2d8f1cae1685aa1736f9a
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45extern "C" { 46#include "main/mtypes.h" 47#include "main/shaderapi.h" 48#include "main/shaderobj.h" 49#include "main/uniforms.h" 50#include "program/hash_table.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_uniform.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81#define MAX_TEMPS 4096 82 83class st_src_reg; 84class st_dst_reg; 85 86static int swizzle_for_size(int size); 87 88/** 89 * This struct is a corresponding struct to TGSI ureg_src. 90 */ 91class st_src_reg { 92public: 93 st_src_reg(gl_register_file file, int index, const glsl_type *type) 94 { 95 this->file = file; 96 this->index = index; 97 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 98 this->swizzle = swizzle_for_size(type->vector_elements); 99 else 100 this->swizzle = SWIZZLE_XYZW; 101 this->negate = 0; 102 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 103 this->reladdr = NULL; 104 } 105 106 st_src_reg(gl_register_file file, int index, int type) 107 { 108 this->type = type; 109 this->file = file; 110 this->index = index; 111 this->swizzle = SWIZZLE_XYZW; 112 this->negate = 0; 113 this->reladdr = NULL; 114 } 115 116 st_src_reg() 117 { 118 this->type = GLSL_TYPE_ERROR; 119 this->file = PROGRAM_UNDEFINED; 120 this->index = 0; 121 this->swizzle = 0; 122 this->negate = 0; 123 this->reladdr = NULL; 124 } 125 126 explicit st_src_reg(st_dst_reg reg); 127 128 gl_register_file file; /**< PROGRAM_* from Mesa */ 129 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 130 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 131 int negate; /**< NEGATE_XYZW mask from mesa */ 132 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 133 /** Register index should be offset by the integer in this reg. */ 134 st_src_reg *reladdr; 135}; 136 137class st_dst_reg { 138public: 139 st_dst_reg(gl_register_file file, int writemask, int type) 140 { 141 this->file = file; 142 this->index = 0; 143 this->writemask = writemask; 144 this->cond_mask = COND_TR; 145 this->reladdr = NULL; 146 this->type = type; 147 } 148 149 st_dst_reg() 150 { 151 this->type = GLSL_TYPE_ERROR; 152 this->file = PROGRAM_UNDEFINED; 153 this->index = 0; 154 this->writemask = 0; 155 this->cond_mask = COND_TR; 156 this->reladdr = NULL; 157 } 158 159 explicit st_dst_reg(st_src_reg reg); 160 161 gl_register_file file; /**< PROGRAM_* from Mesa */ 162 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 163 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 164 GLuint cond_mask:4; 165 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 166 /** Register index should be offset by the integer in this reg. */ 167 st_src_reg *reladdr; 168}; 169 170st_src_reg::st_src_reg(st_dst_reg reg) 171{ 172 this->type = reg.type; 173 this->file = reg.file; 174 this->index = reg.index; 175 this->swizzle = SWIZZLE_XYZW; 176 this->negate = 0; 177 this->reladdr = reg.reladdr; 178} 179 180st_dst_reg::st_dst_reg(st_src_reg reg) 181{ 182 this->type = reg.type; 183 this->file = reg.file; 184 this->index = reg.index; 185 this->writemask = WRITEMASK_XYZW; 186 this->cond_mask = COND_TR; 187 this->reladdr = reg.reladdr; 188} 189 190class glsl_to_tgsi_instruction : public exec_node { 191public: 192 /* Callers of this ralloc-based new need not call delete. It's 193 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 194 static void* operator new(size_t size, void *ctx) 195 { 196 void *node; 197 198 node = rzalloc_size(ctx, size); 199 assert(node != NULL); 200 201 return node; 202 } 203 204 unsigned op; 205 st_dst_reg dst; 206 st_src_reg src[3]; 207 /** Pointer to the ir source this tree came from for debugging */ 208 ir_instruction *ir; 209 GLboolean cond_update; 210 bool saturate; 211 int sampler; /**< sampler index */ 212 int tex_target; /**< One of TEXTURE_*_INDEX */ 213 GLboolean tex_shadow; 214 int dead_mask; /**< Used in dead code elimination */ 215 216 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 217}; 218 219class variable_storage : public exec_node { 220public: 221 variable_storage(ir_variable *var, gl_register_file file, int index) 222 : file(file), index(index), var(var) 223 { 224 /* empty */ 225 } 226 227 gl_register_file file; 228 int index; 229 ir_variable *var; /* variable that maps to this, if any */ 230}; 231 232class immediate_storage : public exec_node { 233public: 234 immediate_storage(gl_constant_value *values, int size, int type) 235 { 236 memcpy(this->values, values, size * sizeof(gl_constant_value)); 237 this->size = size; 238 this->type = type; 239 } 240 241 gl_constant_value values[4]; 242 int size; /**< Number of components (1-4) */ 243 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 244}; 245 246class function_entry : public exec_node { 247public: 248 ir_function_signature *sig; 249 250 /** 251 * identifier of this function signature used by the program. 252 * 253 * At the point that TGSI instructions for function calls are 254 * generated, we don't know the address of the first instruction of 255 * the function body. So we make the BranchTarget that is called a 256 * small integer and rewrite them during set_branchtargets(). 257 */ 258 int sig_id; 259 260 /** 261 * Pointer to first instruction of the function body. 262 * 263 * Set during function body emits after main() is processed. 264 */ 265 glsl_to_tgsi_instruction *bgn_inst; 266 267 /** 268 * Index of the first instruction of the function body in actual TGSI. 269 * 270 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 271 */ 272 int inst; 273 274 /** Storage for the return value. */ 275 st_src_reg return_reg; 276}; 277 278class glsl_to_tgsi_visitor : public ir_visitor { 279public: 280 glsl_to_tgsi_visitor(); 281 ~glsl_to_tgsi_visitor(); 282 283 function_entry *current_function; 284 285 struct gl_context *ctx; 286 struct gl_program *prog; 287 struct gl_shader_program *shader_program; 288 struct gl_shader_compiler_options *options; 289 290 int next_temp; 291 292 int num_address_regs; 293 int samplers_used; 294 bool indirect_addr_temps; 295 bool indirect_addr_consts; 296 297 int glsl_version; 298 299 variable_storage *find_variable_storage(ir_variable *var); 300 301 int add_constant(gl_register_file file, gl_constant_value values[4], 302 int size, int datatype, GLuint *swizzle_out); 303 304 function_entry *get_function_signature(ir_function_signature *sig); 305 306 st_src_reg get_temp(const glsl_type *type); 307 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 308 309 st_src_reg st_src_reg_for_float(float val); 310 st_src_reg st_src_reg_for_int(int val); 311 st_src_reg st_src_reg_for_type(int type, int val); 312 313 /** 314 * \name Visit methods 315 * 316 * As typical for the visitor pattern, there must be one \c visit method for 317 * each concrete subclass of \c ir_instruction. Virtual base classes within 318 * the hierarchy should not have \c visit methods. 319 */ 320 /*@{*/ 321 virtual void visit(ir_variable *); 322 virtual void visit(ir_loop *); 323 virtual void visit(ir_loop_jump *); 324 virtual void visit(ir_function_signature *); 325 virtual void visit(ir_function *); 326 virtual void visit(ir_expression *); 327 virtual void visit(ir_swizzle *); 328 virtual void visit(ir_dereference_variable *); 329 virtual void visit(ir_dereference_array *); 330 virtual void visit(ir_dereference_record *); 331 virtual void visit(ir_assignment *); 332 virtual void visit(ir_constant *); 333 virtual void visit(ir_call *); 334 virtual void visit(ir_return *); 335 virtual void visit(ir_discard *); 336 virtual void visit(ir_texture *); 337 virtual void visit(ir_if *); 338 /*@}*/ 339 340 st_src_reg result; 341 342 /** List of variable_storage */ 343 exec_list variables; 344 345 /** List of immediate_storage */ 346 exec_list immediates; 347 int num_immediates; 348 349 /** List of function_entry */ 350 exec_list function_signatures; 351 int next_signature_id; 352 353 /** List of glsl_to_tgsi_instruction */ 354 exec_list instructions; 355 356 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 357 358 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 359 st_dst_reg dst, st_src_reg src0); 360 361 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 362 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 363 364 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 365 st_dst_reg dst, 366 st_src_reg src0, st_src_reg src1, st_src_reg src2); 367 368 unsigned get_opcode(ir_instruction *ir, unsigned op, 369 st_dst_reg dst, 370 st_src_reg src0, st_src_reg src1); 371 372 /** 373 * Emit the correct dot-product instruction for the type of arguments 374 */ 375 void emit_dp(ir_instruction *ir, 376 st_dst_reg dst, 377 st_src_reg src0, 378 st_src_reg src1, 379 unsigned elements); 380 381 void emit_scalar(ir_instruction *ir, unsigned op, 382 st_dst_reg dst, st_src_reg src0); 383 384 void emit_scalar(ir_instruction *ir, unsigned op, 385 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 386 387 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 388 389 void emit_scs(ir_instruction *ir, unsigned op, 390 st_dst_reg dst, const st_src_reg &src); 391 392 GLboolean try_emit_mad(ir_expression *ir, 393 int mul_operand); 394 GLboolean try_emit_sat(ir_expression *ir); 395 396 void emit_swz(ir_expression *ir); 397 398 bool process_move_condition(ir_rvalue *ir); 399 400 void remove_output_reads(gl_register_file type); 401 void simplify_cmp(void); 402 403 void rename_temp_register(int index, int new_index); 404 int get_first_temp_read(int index); 405 int get_first_temp_write(int index); 406 int get_last_temp_read(int index); 407 int get_last_temp_write(int index); 408 409 void copy_propagate(void); 410 void eliminate_dead_code(void); 411 int eliminate_dead_code_advanced(void); 412 void merge_registers(void); 413 void renumber_registers(void); 414 415 void *mem_ctx; 416}; 417 418static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 419 420static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 421 422static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 423 424static void 425fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 426 427static void 428fail_link(struct gl_shader_program *prog, const char *fmt, ...) 429{ 430 va_list args; 431 va_start(args, fmt); 432 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 433 va_end(args); 434 435 prog->LinkStatus = GL_FALSE; 436} 437 438static int 439swizzle_for_size(int size) 440{ 441 int size_swizzles[4] = { 442 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 443 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 444 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 445 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 446 }; 447 448 assert((size >= 1) && (size <= 4)); 449 return size_swizzles[size - 1]; 450} 451 452static bool 453is_tex_instruction(unsigned opcode) 454{ 455 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 456 return info->is_tex; 457} 458 459static unsigned 460num_inst_dst_regs(unsigned opcode) 461{ 462 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 463 return info->num_dst; 464} 465 466static unsigned 467num_inst_src_regs(unsigned opcode) 468{ 469 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 470 return info->is_tex ? info->num_src - 1 : info->num_src; 471} 472 473glsl_to_tgsi_instruction * 474glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 475 st_dst_reg dst, 476 st_src_reg src0, st_src_reg src1, st_src_reg src2) 477{ 478 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 479 int num_reladdr = 0, i; 480 481 op = get_opcode(ir, op, dst, src0, src1); 482 483 /* If we have to do relative addressing, we want to load the ARL 484 * reg directly for one of the regs, and preload the other reladdr 485 * sources into temps. 486 */ 487 num_reladdr += dst.reladdr != NULL; 488 num_reladdr += src0.reladdr != NULL; 489 num_reladdr += src1.reladdr != NULL; 490 num_reladdr += src2.reladdr != NULL; 491 492 reladdr_to_temp(ir, &src2, &num_reladdr); 493 reladdr_to_temp(ir, &src1, &num_reladdr); 494 reladdr_to_temp(ir, &src0, &num_reladdr); 495 496 if (dst.reladdr) { 497 emit_arl(ir, address_reg, *dst.reladdr); 498 num_reladdr--; 499 } 500 assert(num_reladdr == 0); 501 502 inst->op = op; 503 inst->dst = dst; 504 inst->src[0] = src0; 505 inst->src[1] = src1; 506 inst->src[2] = src2; 507 inst->ir = ir; 508 inst->dead_mask = 0; 509 510 inst->function = NULL; 511 512 if (op == TGSI_OPCODE_ARL) 513 this->num_address_regs = 1; 514 515 /* Update indirect addressing status used by TGSI */ 516 if (dst.reladdr) { 517 switch(dst.file) { 518 case PROGRAM_TEMPORARY: 519 this->indirect_addr_temps = true; 520 break; 521 case PROGRAM_LOCAL_PARAM: 522 case PROGRAM_ENV_PARAM: 523 case PROGRAM_STATE_VAR: 524 case PROGRAM_NAMED_PARAM: 525 case PROGRAM_CONSTANT: 526 case PROGRAM_UNIFORM: 527 this->indirect_addr_consts = true; 528 break; 529 case PROGRAM_IMMEDIATE: 530 assert(!"immediates should not have indirect addressing"); 531 break; 532 default: 533 break; 534 } 535 } 536 else { 537 for (i=0; i<3; i++) { 538 if(inst->src[i].reladdr) { 539 switch(inst->src[i].file) { 540 case PROGRAM_TEMPORARY: 541 this->indirect_addr_temps = true; 542 break; 543 case PROGRAM_LOCAL_PARAM: 544 case PROGRAM_ENV_PARAM: 545 case PROGRAM_STATE_VAR: 546 case PROGRAM_NAMED_PARAM: 547 case PROGRAM_CONSTANT: 548 case PROGRAM_UNIFORM: 549 this->indirect_addr_consts = true; 550 break; 551 case PROGRAM_IMMEDIATE: 552 assert(!"immediates should not have indirect addressing"); 553 break; 554 default: 555 break; 556 } 557 } 558 } 559 } 560 561 this->instructions.push_tail(inst); 562 563 return inst; 564} 565 566 567glsl_to_tgsi_instruction * 568glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 569 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 570{ 571 return emit(ir, op, dst, src0, src1, undef_src); 572} 573 574glsl_to_tgsi_instruction * 575glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 576 st_dst_reg dst, st_src_reg src0) 577{ 578 assert(dst.writemask != 0); 579 return emit(ir, op, dst, src0, undef_src, undef_src); 580} 581 582glsl_to_tgsi_instruction * 583glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 584{ 585 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 586} 587 588/** 589 * Determines whether to use an integer, unsigned integer, or float opcode 590 * based on the operands and input opcode, then emits the result. 591 * 592 * TODO: type checking for remaining TGSI opcodes 593 */ 594unsigned 595glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 596 st_dst_reg dst, 597 st_src_reg src0, st_src_reg src1) 598{ 599 int type = GLSL_TYPE_FLOAT; 600 601 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 602 type = GLSL_TYPE_FLOAT; 603 else if (glsl_version >= 130) 604 type = src0.type; 605 606#define case4(c, f, i, u) \ 607 case TGSI_OPCODE_##c: \ 608 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 609 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 610 else op = TGSI_OPCODE_##f; \ 611 break; 612#define case3(f, i, u) case4(f, f, i, u) 613#define case2fi(f, i) case4(f, f, i, i) 614#define case2iu(i, u) case4(i, LAST, i, u) 615 616 switch(op) { 617 case2fi(ADD, UADD); 618 case2fi(MUL, UMUL); 619 case2fi(MAD, UMAD); 620 case3(DIV, IDIV, UDIV); 621 case3(MAX, IMAX, UMAX); 622 case3(MIN, IMIN, UMIN); 623 case2iu(MOD, UMOD); 624 625 case2fi(SEQ, USEQ); 626 case2fi(SNE, USNE); 627 case3(SGE, ISGE, USGE); 628 case3(SLT, ISLT, USLT); 629 630 case2iu(SHL, SHL); 631 case2iu(ISHR, USHR); 632 case2iu(NOT, NOT); 633 case2iu(AND, AND); 634 case2iu(OR, OR); 635 case2iu(XOR, XOR); 636 637 default: break; 638 } 639 640 assert(op != TGSI_OPCODE_LAST); 641 return op; 642} 643 644void 645glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 646 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 647 unsigned elements) 648{ 649 static const unsigned dot_opcodes[] = { 650 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 651 }; 652 653 emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 654} 655 656/** 657 * Emits TGSI scalar opcodes to produce unique answers across channels. 658 * 659 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 660 * channel determines the result across all channels. So to do a vec4 661 * of this operation, we want to emit a scalar per source channel used 662 * to produce dest channels. 663 */ 664void 665glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 666 st_dst_reg dst, 667 st_src_reg orig_src0, st_src_reg orig_src1) 668{ 669 int i, j; 670 int done_mask = ~dst.writemask; 671 672 /* TGSI RCP is a scalar operation splatting results to all channels, 673 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 674 * dst channels. 675 */ 676 for (i = 0; i < 4; i++) { 677 GLuint this_mask = (1 << i); 678 glsl_to_tgsi_instruction *inst; 679 st_src_reg src0 = orig_src0; 680 st_src_reg src1 = orig_src1; 681 682 if (done_mask & this_mask) 683 continue; 684 685 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 686 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 687 for (j = i + 1; j < 4; j++) { 688 /* If there is another enabled component in the destination that is 689 * derived from the same inputs, generate its value on this pass as 690 * well. 691 */ 692 if (!(done_mask & (1 << j)) && 693 GET_SWZ(src0.swizzle, j) == src0_swiz && 694 GET_SWZ(src1.swizzle, j) == src1_swiz) { 695 this_mask |= (1 << j); 696 } 697 } 698 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 699 src0_swiz, src0_swiz); 700 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 701 src1_swiz, src1_swiz); 702 703 inst = emit(ir, op, dst, src0, src1); 704 inst->dst.writemask = this_mask; 705 done_mask |= this_mask; 706 } 707} 708 709void 710glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 711 st_dst_reg dst, st_src_reg src0) 712{ 713 st_src_reg undef = undef_src; 714 715 undef.swizzle = SWIZZLE_XXXX; 716 717 emit_scalar(ir, op, dst, src0, undef); 718} 719 720void 721glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 722 st_dst_reg dst, st_src_reg src0) 723{ 724 st_src_reg tmp = get_temp(glsl_type::float_type); 725 726 if (src0.type == GLSL_TYPE_INT) 727 emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); 728 else if (src0.type == GLSL_TYPE_UINT) 729 emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); 730 else 731 tmp = src0; 732 733 emit(NULL, TGSI_OPCODE_ARL, dst, tmp); 734} 735 736/** 737 * Emit an TGSI_OPCODE_SCS instruction 738 * 739 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 740 * Instead of splatting its result across all four components of the 741 * destination, it writes one value to the \c x component and another value to 742 * the \c y component. 743 * 744 * \param ir IR instruction being processed 745 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 746 * on which value is desired. 747 * \param dst Destination register 748 * \param src Source register 749 */ 750void 751glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 752 st_dst_reg dst, 753 const st_src_reg &src) 754{ 755 /* Vertex programs cannot use the SCS opcode. 756 */ 757 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 758 emit_scalar(ir, op, dst, src); 759 return; 760 } 761 762 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 763 const unsigned scs_mask = (1U << component); 764 int done_mask = ~dst.writemask; 765 st_src_reg tmp; 766 767 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 768 769 /* If there are compnents in the destination that differ from the component 770 * that will be written by the SCS instrution, we'll need a temporary. 771 */ 772 if (scs_mask != unsigned(dst.writemask)) { 773 tmp = get_temp(glsl_type::vec4_type); 774 } 775 776 for (unsigned i = 0; i < 4; i++) { 777 unsigned this_mask = (1U << i); 778 st_src_reg src0 = src; 779 780 if ((done_mask & this_mask) != 0) 781 continue; 782 783 /* The source swizzle specified which component of the source generates 784 * sine / cosine for the current component in the destination. The SCS 785 * instruction requires that this value be swizzle to the X component. 786 * Replace the current swizzle with a swizzle that puts the source in 787 * the X component. 788 */ 789 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 790 791 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 792 src0_swiz, src0_swiz); 793 for (unsigned j = i + 1; j < 4; j++) { 794 /* If there is another enabled component in the destination that is 795 * derived from the same inputs, generate its value on this pass as 796 * well. 797 */ 798 if (!(done_mask & (1 << j)) && 799 GET_SWZ(src0.swizzle, j) == src0_swiz) { 800 this_mask |= (1 << j); 801 } 802 } 803 804 if (this_mask != scs_mask) { 805 glsl_to_tgsi_instruction *inst; 806 st_dst_reg tmp_dst = st_dst_reg(tmp); 807 808 /* Emit the SCS instruction. 809 */ 810 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 811 inst->dst.writemask = scs_mask; 812 813 /* Move the result of the SCS instruction to the desired location in 814 * the destination. 815 */ 816 tmp.swizzle = MAKE_SWIZZLE4(component, component, 817 component, component); 818 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 819 inst->dst.writemask = this_mask; 820 } else { 821 /* Emit the SCS instruction to write directly to the destination. 822 */ 823 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 824 inst->dst.writemask = scs_mask; 825 } 826 827 done_mask |= this_mask; 828 } 829} 830 831int 832glsl_to_tgsi_visitor::add_constant(gl_register_file file, 833 gl_constant_value values[4], int size, int datatype, 834 GLuint *swizzle_out) 835{ 836 if (file == PROGRAM_CONSTANT) { 837 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 838 size, datatype, swizzle_out); 839 } else { 840 int index = 0; 841 immediate_storage *entry; 842 assert(file == PROGRAM_IMMEDIATE); 843 844 /* Search immediate storage to see if we already have an identical 845 * immediate that we can use instead of adding a duplicate entry. 846 */ 847 foreach_iter(exec_list_iterator, iter, this->immediates) { 848 entry = (immediate_storage *)iter.get(); 849 850 if (entry->size == size && 851 entry->type == datatype && 852 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 853 return index; 854 } 855 index++; 856 } 857 858 /* Add this immediate to the list. */ 859 entry = new(mem_ctx) immediate_storage(values, size, datatype); 860 this->immediates.push_tail(entry); 861 this->num_immediates++; 862 return index; 863 } 864} 865 866struct st_src_reg 867glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 868{ 869 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 870 union gl_constant_value uval; 871 872 uval.f = val; 873 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 874 875 return src; 876} 877 878struct st_src_reg 879glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 880{ 881 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 882 union gl_constant_value uval; 883 884 assert(glsl_version >= 130); 885 886 uval.i = val; 887 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 888 889 return src; 890} 891 892struct st_src_reg 893glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 894{ 895 if (glsl_version >= 130) 896 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 897 st_src_reg_for_int(val); 898 else 899 return st_src_reg_for_float(val); 900} 901 902static int 903type_size(const struct glsl_type *type) 904{ 905 unsigned int i; 906 int size; 907 908 switch (type->base_type) { 909 case GLSL_TYPE_UINT: 910 case GLSL_TYPE_INT: 911 case GLSL_TYPE_FLOAT: 912 case GLSL_TYPE_BOOL: 913 if (type->is_matrix()) { 914 return type->matrix_columns; 915 } else { 916 /* Regardless of size of vector, it gets a vec4. This is bad 917 * packing for things like floats, but otherwise arrays become a 918 * mess. Hopefully a later pass over the code can pack scalars 919 * down if appropriate. 920 */ 921 return 1; 922 } 923 case GLSL_TYPE_ARRAY: 924 assert(type->length > 0); 925 return type_size(type->fields.array) * type->length; 926 case GLSL_TYPE_STRUCT: 927 size = 0; 928 for (i = 0; i < type->length; i++) { 929 size += type_size(type->fields.structure[i].type); 930 } 931 return size; 932 case GLSL_TYPE_SAMPLER: 933 /* Samplers take up one slot in UNIFORMS[], but they're baked in 934 * at link time. 935 */ 936 return 1; 937 default: 938 assert(0); 939 return 0; 940 } 941} 942 943/** 944 * In the initial pass of codegen, we assign temporary numbers to 945 * intermediate results. (not SSA -- variable assignments will reuse 946 * storage). 947 */ 948st_src_reg 949glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 950{ 951 st_src_reg src; 952 int swizzle[4]; 953 int i; 954 955 src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; 956 src.file = PROGRAM_TEMPORARY; 957 src.index = next_temp; 958 src.reladdr = NULL; 959 next_temp += type_size(type); 960 961 if (type->is_array() || type->is_record()) { 962 src.swizzle = SWIZZLE_NOOP; 963 } else { 964 for (i = 0; i < type->vector_elements; i++) 965 swizzle[i] = i; 966 for (; i < 4; i++) 967 swizzle[i] = type->vector_elements - 1; 968 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], 969 swizzle[2], swizzle[3]); 970 } 971 src.negate = 0; 972 973 return src; 974} 975 976variable_storage * 977glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 978{ 979 980 variable_storage *entry; 981 982 foreach_iter(exec_list_iterator, iter, this->variables) { 983 entry = (variable_storage *)iter.get(); 984 985 if (entry->var == var) 986 return entry; 987 } 988 989 return NULL; 990} 991 992void 993glsl_to_tgsi_visitor::visit(ir_variable *ir) 994{ 995 if (strcmp(ir->name, "gl_FragCoord") == 0) { 996 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 997 998 fp->OriginUpperLeft = ir->origin_upper_left; 999 fp->PixelCenterInteger = ir->pixel_center_integer; 1000 1001 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 1002 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1003 switch (ir->depth_layout) { 1004 case ir_depth_layout_none: 1005 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; 1006 break; 1007 case ir_depth_layout_any: 1008 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; 1009 break; 1010 case ir_depth_layout_greater: 1011 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; 1012 break; 1013 case ir_depth_layout_less: 1014 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; 1015 break; 1016 case ir_depth_layout_unchanged: 1017 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; 1018 break; 1019 default: 1020 assert(0); 1021 break; 1022 } 1023 } 1024 1025 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1026 unsigned int i; 1027 const ir_state_slot *const slots = ir->state_slots; 1028 assert(ir->state_slots != NULL); 1029 1030 /* Check if this statevar's setup in the STATE file exactly 1031 * matches how we'll want to reference it as a 1032 * struct/array/whatever. If not, then we need to move it into 1033 * temporary storage and hope that it'll get copy-propagated 1034 * out. 1035 */ 1036 for (i = 0; i < ir->num_state_slots; i++) { 1037 if (slots[i].swizzle != SWIZZLE_XYZW) { 1038 break; 1039 } 1040 } 1041 1042 struct variable_storage *storage; 1043 st_dst_reg dst; 1044 if (i == ir->num_state_slots) { 1045 /* We'll set the index later. */ 1046 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1047 this->variables.push_tail(storage); 1048 1049 dst = undef_dst; 1050 } else { 1051 /* The variable_storage constructor allocates slots based on the size 1052 * of the type. However, this had better match the number of state 1053 * elements that we're going to copy into the new temporary. 1054 */ 1055 assert((int) ir->num_state_slots == type_size(ir->type)); 1056 1057 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1058 this->next_temp); 1059 this->variables.push_tail(storage); 1060 this->next_temp += type_size(ir->type); 1061 1062 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1063 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1064 } 1065 1066 1067 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1068 int index = _mesa_add_state_reference(this->prog->Parameters, 1069 (gl_state_index *)slots[i].tokens); 1070 1071 if (storage->file == PROGRAM_STATE_VAR) { 1072 if (storage->index == -1) { 1073 storage->index = index; 1074 } else { 1075 assert(index == storage->index + (int)i); 1076 } 1077 } else { 1078 st_src_reg src(PROGRAM_STATE_VAR, index, 1079 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); 1080 src.swizzle = slots[i].swizzle; 1081 emit(ir, TGSI_OPCODE_MOV, dst, src); 1082 /* even a float takes up a whole vec4 reg in a struct/array. */ 1083 dst.index++; 1084 } 1085 } 1086 1087 if (storage->file == PROGRAM_TEMPORARY && 1088 dst.index != storage->index + (int) ir->num_state_slots) { 1089 fail_link(this->shader_program, 1090 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1091 ir->name, dst.index - storage->index, 1092 type_size(ir->type)); 1093 } 1094 } 1095} 1096 1097void 1098glsl_to_tgsi_visitor::visit(ir_loop *ir) 1099{ 1100 ir_dereference_variable *counter = NULL; 1101 1102 if (ir->counter != NULL) 1103 counter = new(ir) ir_dereference_variable(ir->counter); 1104 1105 if (ir->from != NULL) { 1106 assert(ir->counter != NULL); 1107 1108 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1109 1110 a->accept(this); 1111 delete a; 1112 } 1113 1114 emit(NULL, TGSI_OPCODE_BGNLOOP); 1115 1116 if (ir->to) { 1117 ir_expression *e = 1118 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1119 counter, ir->to); 1120 ir_if *if_stmt = new(ir) ir_if(e); 1121 1122 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1123 1124 if_stmt->then_instructions.push_tail(brk); 1125 1126 if_stmt->accept(this); 1127 1128 delete if_stmt; 1129 delete e; 1130 delete brk; 1131 } 1132 1133 visit_exec_list(&ir->body_instructions, this); 1134 1135 if (ir->increment) { 1136 ir_expression *e = 1137 new(ir) ir_expression(ir_binop_add, counter->type, 1138 counter, ir->increment); 1139 1140 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1141 1142 a->accept(this); 1143 delete a; 1144 delete e; 1145 } 1146 1147 emit(NULL, TGSI_OPCODE_ENDLOOP); 1148} 1149 1150void 1151glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1152{ 1153 switch (ir->mode) { 1154 case ir_loop_jump::jump_break: 1155 emit(NULL, TGSI_OPCODE_BRK); 1156 break; 1157 case ir_loop_jump::jump_continue: 1158 emit(NULL, TGSI_OPCODE_CONT); 1159 break; 1160 } 1161} 1162 1163 1164void 1165glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1166{ 1167 assert(0); 1168 (void)ir; 1169} 1170 1171void 1172glsl_to_tgsi_visitor::visit(ir_function *ir) 1173{ 1174 /* Ignore function bodies other than main() -- we shouldn't see calls to 1175 * them since they should all be inlined before we get to glsl_to_tgsi. 1176 */ 1177 if (strcmp(ir->name, "main") == 0) { 1178 const ir_function_signature *sig; 1179 exec_list empty; 1180 1181 sig = ir->matching_signature(&empty); 1182 1183 assert(sig); 1184 1185 foreach_iter(exec_list_iterator, iter, sig->body) { 1186 ir_instruction *ir = (ir_instruction *)iter.get(); 1187 1188 ir->accept(this); 1189 } 1190 } 1191} 1192 1193GLboolean 1194glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1195{ 1196 int nonmul_operand = 1 - mul_operand; 1197 st_src_reg a, b, c; 1198 st_dst_reg result_dst; 1199 1200 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1201 if (!expr || expr->operation != ir_binop_mul) 1202 return false; 1203 1204 expr->operands[0]->accept(this); 1205 a = this->result; 1206 expr->operands[1]->accept(this); 1207 b = this->result; 1208 ir->operands[nonmul_operand]->accept(this); 1209 c = this->result; 1210 1211 this->result = get_temp(ir->type); 1212 result_dst = st_dst_reg(this->result); 1213 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1214 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1215 1216 return true; 1217} 1218 1219GLboolean 1220glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1221{ 1222 /* Saturates were only introduced to vertex programs in 1223 * NV_vertex_program3, so don't give them to drivers in the VP. 1224 */ 1225 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1226 return false; 1227 1228 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1229 if (!sat_src) 1230 return false; 1231 1232 sat_src->accept(this); 1233 st_src_reg src = this->result; 1234 1235 this->result = get_temp(ir->type); 1236 st_dst_reg result_dst = st_dst_reg(this->result); 1237 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1238 glsl_to_tgsi_instruction *inst; 1239 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1240 inst->saturate = true; 1241 1242 return true; 1243} 1244 1245void 1246glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1247 st_src_reg *reg, int *num_reladdr) 1248{ 1249 if (!reg->reladdr) 1250 return; 1251 1252 emit_arl(ir, address_reg, *reg->reladdr); 1253 1254 if (*num_reladdr != 1) { 1255 st_src_reg temp = get_temp(glsl_type::vec4_type); 1256 1257 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1258 *reg = temp; 1259 } 1260 1261 (*num_reladdr)--; 1262} 1263 1264void 1265glsl_to_tgsi_visitor::visit(ir_expression *ir) 1266{ 1267 unsigned int operand; 1268 st_src_reg op[Elements(ir->operands)]; 1269 st_src_reg result_src; 1270 st_dst_reg result_dst; 1271 1272 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1273 */ 1274 if (ir->operation == ir_binop_add) { 1275 if (try_emit_mad(ir, 1)) 1276 return; 1277 if (try_emit_mad(ir, 0)) 1278 return; 1279 } 1280 if (try_emit_sat(ir)) 1281 return; 1282 1283 if (ir->operation == ir_quadop_vector) 1284 assert(!"ir_quadop_vector should have been lowered"); 1285 1286 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1287 this->result.file = PROGRAM_UNDEFINED; 1288 ir->operands[operand]->accept(this); 1289 if (this->result.file == PROGRAM_UNDEFINED) { 1290 ir_print_visitor v; 1291 printf("Failed to get tree for expression operand:\n"); 1292 ir->operands[operand]->accept(&v); 1293 exit(1); 1294 } 1295 op[operand] = this->result; 1296 1297 /* Matrix expression operands should have been broken down to vector 1298 * operations already. 1299 */ 1300 assert(!ir->operands[operand]->type->is_matrix()); 1301 } 1302 1303 int vector_elements = ir->operands[0]->type->vector_elements; 1304 if (ir->operands[1]) { 1305 vector_elements = MAX2(vector_elements, 1306 ir->operands[1]->type->vector_elements); 1307 } 1308 1309 this->result.file = PROGRAM_UNDEFINED; 1310 1311 /* Storage for our result. Ideally for an assignment we'd be using 1312 * the actual storage for the result here, instead. 1313 */ 1314 result_src = get_temp(ir->type); 1315 /* convenience for the emit functions below. */ 1316 result_dst = st_dst_reg(result_src); 1317 /* Limit writes to the channels that will be used by result_src later. 1318 * This does limit this temp's use as a temporary for multi-instruction 1319 * sequences. 1320 */ 1321 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1322 1323 switch (ir->operation) { 1324 case ir_unop_logic_not: 1325 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); 1326 break; 1327 case ir_unop_neg: 1328 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); 1329 if (result_dst.type == GLSL_TYPE_INT) 1330 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1331 else { 1332 op[0].negate = ~op[0].negate; 1333 result_src = op[0]; 1334 } 1335 break; 1336 case ir_unop_abs: 1337 assert(result_dst.type == GLSL_TYPE_FLOAT); 1338 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1339 break; 1340 case ir_unop_sign: 1341 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1342 break; 1343 case ir_unop_rcp: 1344 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1345 break; 1346 1347 case ir_unop_exp2: 1348 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1349 break; 1350 case ir_unop_exp: 1351 case ir_unop_log: 1352 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1353 break; 1354 case ir_unop_log2: 1355 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1356 break; 1357 case ir_unop_sin: 1358 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1359 break; 1360 case ir_unop_cos: 1361 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1362 break; 1363 case ir_unop_sin_reduced: 1364 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1365 break; 1366 case ir_unop_cos_reduced: 1367 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1368 break; 1369 1370 case ir_unop_dFdx: 1371 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1372 break; 1373 case ir_unop_dFdy: 1374 op[0].negate = ~op[0].negate; 1375 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1376 break; 1377 1378 case ir_unop_noise: { 1379 /* At some point, a motivated person could add a better 1380 * implementation of noise. Currently not even the nvidia 1381 * binary drivers do anything more than this. In any case, the 1382 * place to do this is in the GL state tracker, not the poor 1383 * driver. 1384 */ 1385 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1386 break; 1387 } 1388 1389 case ir_binop_add: 1390 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1391 break; 1392 case ir_binop_sub: 1393 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1394 break; 1395 1396 case ir_binop_mul: 1397 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1398 break; 1399 case ir_binop_div: 1400 if (result_dst.type == GLSL_TYPE_FLOAT) 1401 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1402 else 1403 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1404 break; 1405 case ir_binop_mod: 1406 if (result_dst.type == GLSL_TYPE_FLOAT) 1407 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1408 else 1409 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1410 break; 1411 1412 case ir_binop_less: 1413 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1414 break; 1415 case ir_binop_greater: 1416 emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); 1417 break; 1418 case ir_binop_lequal: 1419 emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); 1420 break; 1421 case ir_binop_gequal: 1422 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1423 break; 1424 case ir_binop_equal: 1425 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1426 break; 1427 case ir_binop_nequal: 1428 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1429 break; 1430 case ir_binop_all_equal: 1431 /* "==" operator producing a scalar boolean. */ 1432 if (ir->operands[0]->type->is_vector() || 1433 ir->operands[1]->type->is_vector()) { 1434 st_src_reg temp = get_temp(glsl_version >= 130 ? 1435 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1436 glsl_type::vec4_type); 1437 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1438 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1439 emit_dp(ir, result_dst, temp, temp, vector_elements); 1440 emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); 1441 } else { 1442 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1443 } 1444 break; 1445 case ir_binop_any_nequal: 1446 /* "!=" operator producing a scalar boolean. */ 1447 if (ir->operands[0]->type->is_vector() || 1448 ir->operands[1]->type->is_vector()) { 1449 st_src_reg temp = get_temp(glsl_version >= 130 ? 1450 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1451 glsl_type::vec4_type); 1452 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); 1453 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1454 emit_dp(ir, result_dst, temp, temp, vector_elements); 1455 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1456 } else { 1457 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1458 } 1459 break; 1460 1461 case ir_unop_any: 1462 assert(ir->operands[0]->type->is_vector()); 1463 emit_dp(ir, result_dst, op[0], op[0], 1464 ir->operands[0]->type->vector_elements); 1465 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1466 break; 1467 1468 case ir_binop_logic_xor: 1469 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1470 break; 1471 1472 case ir_binop_logic_or: 1473 /* This could be a saturated add and skip the SNE. */ 1474 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1475 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); 1476 break; 1477 1478 case ir_binop_logic_and: 1479 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1480 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1481 break; 1482 1483 case ir_binop_dot: 1484 assert(ir->operands[0]->type->is_vector()); 1485 assert(ir->operands[0]->type == ir->operands[1]->type); 1486 emit_dp(ir, result_dst, op[0], op[1], 1487 ir->operands[0]->type->vector_elements); 1488 break; 1489 1490 case ir_unop_sqrt: 1491 /* sqrt(x) = x * rsq(x). */ 1492 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1493 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1494 /* For incoming channels <= 0, set the result to 0. */ 1495 op[0].negate = ~op[0].negate; 1496 emit(ir, TGSI_OPCODE_CMP, result_dst, 1497 op[0], result_src, st_src_reg_for_float(0.0)); 1498 break; 1499 case ir_unop_rsq: 1500 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1501 break; 1502 case ir_unop_i2f: 1503 case ir_unop_b2f: 1504 if (glsl_version >= 130) { 1505 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1506 break; 1507 } 1508 case ir_unop_i2u: 1509 case ir_unop_u2i: 1510 /* Converting between signed and unsigned integers is a no-op. */ 1511 case ir_unop_b2i: 1512 /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ 1513 result_src = op[0]; 1514 break; 1515 case ir_unop_f2i: 1516 if (glsl_version >= 130) 1517 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1518 else 1519 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1520 break; 1521 case ir_unop_f2b: 1522 case ir_unop_i2b: 1523 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1524 st_src_reg_for_type(result_dst.type, 0)); 1525 break; 1526 case ir_unop_trunc: 1527 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1528 break; 1529 case ir_unop_ceil: 1530 op[0].negate = ~op[0].negate; 1531 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1532 result_src.negate = ~result_src.negate; 1533 break; 1534 case ir_unop_floor: 1535 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1536 break; 1537 case ir_unop_fract: 1538 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1539 break; 1540 1541 case ir_binop_min: 1542 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1543 break; 1544 case ir_binop_max: 1545 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1546 break; 1547 case ir_binop_pow: 1548 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1549 break; 1550 1551 case ir_unop_bit_not: 1552 if (glsl_version >= 130) { 1553 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1554 break; 1555 } 1556 case ir_unop_u2f: 1557 if (glsl_version >= 130) { 1558 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1559 break; 1560 } 1561 case ir_binop_lshift: 1562 if (glsl_version >= 130) { 1563 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); 1564 break; 1565 } 1566 case ir_binop_rshift: 1567 if (glsl_version >= 130) { 1568 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); 1569 break; 1570 } 1571 case ir_binop_bit_and: 1572 if (glsl_version >= 130) { 1573 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); 1574 break; 1575 } 1576 case ir_binop_bit_xor: 1577 if (glsl_version >= 130) { 1578 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); 1579 break; 1580 } 1581 case ir_binop_bit_or: 1582 if (glsl_version >= 130) { 1583 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); 1584 break; 1585 } 1586 case ir_unop_round_even: 1587 assert(!"GLSL 1.30 features unsupported"); 1588 break; 1589 1590 case ir_quadop_vector: 1591 /* This operation should have already been handled. 1592 */ 1593 assert(!"Should not get here."); 1594 break; 1595 } 1596 1597 this->result = result_src; 1598} 1599 1600 1601void 1602glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1603{ 1604 st_src_reg src; 1605 int i; 1606 int swizzle[4]; 1607 1608 /* Note that this is only swizzles in expressions, not those on the left 1609 * hand side of an assignment, which do write masking. See ir_assignment 1610 * for that. 1611 */ 1612 1613 ir->val->accept(this); 1614 src = this->result; 1615 assert(src.file != PROGRAM_UNDEFINED); 1616 1617 for (i = 0; i < 4; i++) { 1618 if (i < ir->type->vector_elements) { 1619 switch (i) { 1620 case 0: 1621 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1622 break; 1623 case 1: 1624 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1625 break; 1626 case 2: 1627 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1628 break; 1629 case 3: 1630 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1631 break; 1632 } 1633 } else { 1634 /* If the type is smaller than a vec4, replicate the last 1635 * channel out. 1636 */ 1637 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1638 } 1639 } 1640 1641 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1642 1643 this->result = src; 1644} 1645 1646void 1647glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1648{ 1649 variable_storage *entry = find_variable_storage(ir->var); 1650 ir_variable *var = ir->var; 1651 1652 if (!entry) { 1653 switch (var->mode) { 1654 case ir_var_uniform: 1655 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1656 var->location); 1657 this->variables.push_tail(entry); 1658 break; 1659 case ir_var_in: 1660 case ir_var_inout: 1661 /* The linker assigns locations for varyings and attributes, 1662 * including deprecated builtins (like gl_Color), user-assign 1663 * generic attributes (glBindVertexLocation), and 1664 * user-defined varyings. 1665 * 1666 * FINISHME: We would hit this path for function arguments. Fix! 1667 */ 1668 assert(var->location != -1); 1669 entry = new(mem_ctx) variable_storage(var, 1670 PROGRAM_INPUT, 1671 var->location); 1672 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 1673 var->location >= VERT_ATTRIB_GENERIC0) { 1674 _mesa_add_attribute(this->prog->Attributes, 1675 var->name, 1676 _mesa_sizeof_glsl_type(var->type->gl_type), 1677 var->type->gl_type, 1678 var->location - VERT_ATTRIB_GENERIC0); 1679 } 1680 break; 1681 case ir_var_out: 1682 assert(var->location != -1); 1683 entry = new(mem_ctx) variable_storage(var, 1684 PROGRAM_OUTPUT, 1685 var->location); 1686 break; 1687 case ir_var_system_value: 1688 entry = new(mem_ctx) variable_storage(var, 1689 PROGRAM_SYSTEM_VALUE, 1690 var->location); 1691 break; 1692 case ir_var_auto: 1693 case ir_var_temporary: 1694 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1695 this->next_temp); 1696 this->variables.push_tail(entry); 1697 1698 next_temp += type_size(var->type); 1699 break; 1700 } 1701 1702 if (!entry) { 1703 printf("Failed to make storage for %s\n", var->name); 1704 exit(1); 1705 } 1706 } 1707 1708 this->result = st_src_reg(entry->file, entry->index, var->type); 1709 if (glsl_version <= 120) 1710 this->result.type = GLSL_TYPE_FLOAT; 1711} 1712 1713void 1714glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1715{ 1716 ir_constant *index; 1717 st_src_reg src; 1718 int element_size = type_size(ir->type); 1719 1720 index = ir->array_index->constant_expression_value(); 1721 1722 ir->array->accept(this); 1723 src = this->result; 1724 1725 if (index) { 1726 src.index += index->value.i[0] * element_size; 1727 } else { 1728 st_src_reg array_base = this->result; 1729 /* Variable index array dereference. It eats the "vec4" of the 1730 * base of the array and an index that offsets the TGSI register 1731 * index. 1732 */ 1733 ir->array_index->accept(this); 1734 1735 st_src_reg index_reg; 1736 1737 if (element_size == 1) { 1738 index_reg = this->result; 1739 } else { 1740 index_reg = get_temp(glsl_type::float_type); 1741 1742 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1743 this->result, st_src_reg_for_float(element_size)); 1744 } 1745 1746 /* If there was already a relative address register involved, add the 1747 * new and the old together to get the new offset. 1748 */ 1749 if (src.reladdr != NULL) { 1750 st_src_reg accum_reg = get_temp(glsl_type::float_type); 1751 1752 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 1753 index_reg, *src.reladdr); 1754 1755 index_reg = accum_reg; 1756 } 1757 1758 src.reladdr = ralloc(mem_ctx, st_src_reg); 1759 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1760 } 1761 1762 /* If the type is smaller than a vec4, replicate the last channel out. */ 1763 if (ir->type->is_scalar() || ir->type->is_vector()) 1764 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1765 else 1766 src.swizzle = SWIZZLE_NOOP; 1767 1768 this->result = src; 1769} 1770 1771void 1772glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 1773{ 1774 unsigned int i; 1775 const glsl_type *struct_type = ir->record->type; 1776 int offset = 0; 1777 1778 ir->record->accept(this); 1779 1780 for (i = 0; i < struct_type->length; i++) { 1781 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1782 break; 1783 offset += type_size(struct_type->fields.structure[i].type); 1784 } 1785 1786 /* If the type is smaller than a vec4, replicate the last channel out. */ 1787 if (ir->type->is_scalar() || ir->type->is_vector()) 1788 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1789 else 1790 this->result.swizzle = SWIZZLE_NOOP; 1791 1792 this->result.index += offset; 1793} 1794 1795/** 1796 * We want to be careful in assignment setup to hit the actual storage 1797 * instead of potentially using a temporary like we might with the 1798 * ir_dereference handler. 1799 */ 1800static st_dst_reg 1801get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 1802{ 1803 /* The LHS must be a dereference. If the LHS is a variable indexed array 1804 * access of a vector, it must be separated into a series conditional moves 1805 * before reaching this point (see ir_vec_index_to_cond_assign). 1806 */ 1807 assert(ir->as_dereference()); 1808 ir_dereference_array *deref_array = ir->as_dereference_array(); 1809 if (deref_array) { 1810 assert(!deref_array->array->type->is_vector()); 1811 } 1812 1813 /* Use the rvalue deref handler for the most part. We'll ignore 1814 * swizzles in it and write swizzles using writemask, though. 1815 */ 1816 ir->accept(v); 1817 return st_dst_reg(v->result); 1818} 1819 1820/** 1821 * Process the condition of a conditional assignment 1822 * 1823 * Examines the condition of a conditional assignment to generate the optimal 1824 * first operand of a \c CMP instruction. If the condition is a relational 1825 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1826 * used as the source for the \c CMP instruction. Otherwise the comparison 1827 * is processed to a boolean result, and the boolean result is used as the 1828 * operand to the CMP instruction. 1829 */ 1830bool 1831glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 1832{ 1833 ir_rvalue *src_ir = ir; 1834 bool negate = true; 1835 bool switch_order = false; 1836 1837 ir_expression *const expr = ir->as_expression(); 1838 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 1839 bool zero_on_left = false; 1840 1841 if (expr->operands[0]->is_zero()) { 1842 src_ir = expr->operands[1]; 1843 zero_on_left = true; 1844 } else if (expr->operands[1]->is_zero()) { 1845 src_ir = expr->operands[0]; 1846 zero_on_left = false; 1847 } 1848 1849 /* a is - 0 + - 0 + 1850 * (a < 0) T F F ( a < 0) T F F 1851 * (0 < a) F F T (-a < 0) F F T 1852 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 1853 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 1854 * (a > 0) F F T (-a < 0) F F T 1855 * (0 > a) T F F ( a < 0) T F F 1856 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1857 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1858 * 1859 * Note that exchanging the order of 0 and 'a' in the comparison simply 1860 * means that the value of 'a' should be negated. 1861 */ 1862 if (src_ir != ir) { 1863 switch (expr->operation) { 1864 case ir_binop_less: 1865 switch_order = false; 1866 negate = zero_on_left; 1867 break; 1868 1869 case ir_binop_greater: 1870 switch_order = false; 1871 negate = !zero_on_left; 1872 break; 1873 1874 case ir_binop_lequal: 1875 switch_order = true; 1876 negate = !zero_on_left; 1877 break; 1878 1879 case ir_binop_gequal: 1880 switch_order = true; 1881 negate = zero_on_left; 1882 break; 1883 1884 default: 1885 /* This isn't the right kind of comparison afterall, so make sure 1886 * the whole condition is visited. 1887 */ 1888 src_ir = ir; 1889 break; 1890 } 1891 } 1892 } 1893 1894 src_ir->accept(this); 1895 1896 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1897 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1898 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 1899 * computing the condition. 1900 */ 1901 if (negate) 1902 this->result.negate = ~this->result.negate; 1903 1904 return switch_order; 1905} 1906 1907void 1908glsl_to_tgsi_visitor::visit(ir_assignment *ir) 1909{ 1910 st_dst_reg l; 1911 st_src_reg r; 1912 int i; 1913 1914 ir->rhs->accept(this); 1915 r = this->result; 1916 1917 l = get_assignment_lhs(ir->lhs, this); 1918 1919 /* FINISHME: This should really set to the correct maximal writemask for each 1920 * FINISHME: component written (in the loops below). This case can only 1921 * FINISHME: occur for matrices, arrays, and structures. 1922 */ 1923 if (ir->write_mask == 0) { 1924 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1925 l.writemask = WRITEMASK_XYZW; 1926 } else if (ir->lhs->type->is_scalar() && 1927 ir->lhs->variable_referenced()->mode == ir_var_out) { 1928 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1929 * FINISHME: W component of fragment shader output zero, work correctly. 1930 */ 1931 l.writemask = WRITEMASK_XYZW; 1932 } else { 1933 int swizzles[4]; 1934 int first_enabled_chan = 0; 1935 int rhs_chan = 0; 1936 1937 l.writemask = ir->write_mask; 1938 1939 for (int i = 0; i < 4; i++) { 1940 if (l.writemask & (1 << i)) { 1941 first_enabled_chan = GET_SWZ(r.swizzle, i); 1942 break; 1943 } 1944 } 1945 1946 /* Swizzle a small RHS vector into the channels being written. 1947 * 1948 * glsl ir treats write_mask as dictating how many channels are 1949 * present on the RHS while TGSI treats write_mask as just 1950 * showing which channels of the vec4 RHS get written. 1951 */ 1952 for (int i = 0; i < 4; i++) { 1953 if (l.writemask & (1 << i)) 1954 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1955 else 1956 swizzles[i] = first_enabled_chan; 1957 } 1958 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1959 swizzles[2], swizzles[3]); 1960 } 1961 1962 assert(l.file != PROGRAM_UNDEFINED); 1963 assert(r.file != PROGRAM_UNDEFINED); 1964 1965 if (ir->condition) { 1966 const bool switch_order = this->process_move_condition(ir->condition); 1967 st_src_reg condition = this->result; 1968 1969 for (i = 0; i < type_size(ir->lhs->type); i++) { 1970 st_src_reg l_src = st_src_reg(l); 1971 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 1972 1973 if (switch_order) { 1974 emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); 1975 } else { 1976 emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); 1977 } 1978 1979 l.index++; 1980 r.index++; 1981 } 1982 } else if (ir->rhs->as_expression() && 1983 this->instructions.get_tail() && 1984 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 1985 type_size(ir->lhs->type) == 1) { 1986 /* To avoid emitting an extra MOV when assigning an expression to a 1987 * variable, emit the last instruction of the expression again, but 1988 * replace the destination register with the target of the assignment. 1989 * Dead code elimination will remove the original instruction. 1990 */ 1991 glsl_to_tgsi_instruction *inst; 1992 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1993 emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 1994 } else { 1995 for (i = 0; i < type_size(ir->lhs->type); i++) { 1996 emit(ir, TGSI_OPCODE_MOV, l, r); 1997 l.index++; 1998 r.index++; 1999 } 2000 } 2001} 2002 2003 2004void 2005glsl_to_tgsi_visitor::visit(ir_constant *ir) 2006{ 2007 st_src_reg src; 2008 GLfloat stack_vals[4] = { 0 }; 2009 gl_constant_value *values = (gl_constant_value *) stack_vals; 2010 GLenum gl_type = GL_NONE; 2011 unsigned int i; 2012 static int in_array = 0; 2013 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2014 2015 /* Unfortunately, 4 floats is all we can get into 2016 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2017 * aggregate constant and move each constant value into it. If we 2018 * get lucky, copy propagation will eliminate the extra moves. 2019 */ 2020 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2021 st_src_reg temp_base = get_temp(ir->type); 2022 st_dst_reg temp = st_dst_reg(temp_base); 2023 2024 foreach_iter(exec_list_iterator, iter, ir->components) { 2025 ir_constant *field_value = (ir_constant *)iter.get(); 2026 int size = type_size(field_value->type); 2027 2028 assert(size > 0); 2029 2030 field_value->accept(this); 2031 src = this->result; 2032 2033 for (i = 0; i < (unsigned int)size; i++) { 2034 emit(ir, TGSI_OPCODE_MOV, temp, src); 2035 2036 src.index++; 2037 temp.index++; 2038 } 2039 } 2040 this->result = temp_base; 2041 return; 2042 } 2043 2044 if (ir->type->is_array()) { 2045 st_src_reg temp_base = get_temp(ir->type); 2046 st_dst_reg temp = st_dst_reg(temp_base); 2047 int size = type_size(ir->type->fields.array); 2048 2049 assert(size > 0); 2050 in_array++; 2051 2052 for (i = 0; i < ir->type->length; i++) { 2053 ir->array_elements[i]->accept(this); 2054 src = this->result; 2055 for (int j = 0; j < size; j++) { 2056 emit(ir, TGSI_OPCODE_MOV, temp, src); 2057 2058 src.index++; 2059 temp.index++; 2060 } 2061 } 2062 this->result = temp_base; 2063 in_array--; 2064 return; 2065 } 2066 2067 if (ir->type->is_matrix()) { 2068 st_src_reg mat = get_temp(ir->type); 2069 st_dst_reg mat_column = st_dst_reg(mat); 2070 2071 for (i = 0; i < ir->type->matrix_columns; i++) { 2072 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2073 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2074 2075 src = st_src_reg(file, -1, ir->type->base_type); 2076 src.index = add_constant(file, 2077 values, 2078 ir->type->vector_elements, 2079 GL_FLOAT, 2080 &src.swizzle); 2081 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2082 2083 mat_column.index++; 2084 } 2085 2086 this->result = mat; 2087 return; 2088 } 2089 2090 switch (ir->type->base_type) { 2091 case GLSL_TYPE_FLOAT: 2092 gl_type = GL_FLOAT; 2093 for (i = 0; i < ir->type->vector_elements; i++) { 2094 values[i].f = ir->value.f[i]; 2095 } 2096 break; 2097 case GLSL_TYPE_UINT: 2098 gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; 2099 for (i = 0; i < ir->type->vector_elements; i++) { 2100 if (glsl_version >= 130) 2101 values[i].u = ir->value.u[i]; 2102 else 2103 values[i].f = ir->value.u[i]; 2104 } 2105 break; 2106 case GLSL_TYPE_INT: 2107 gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; 2108 for (i = 0; i < ir->type->vector_elements; i++) { 2109 if (glsl_version >= 130) 2110 values[i].i = ir->value.i[i]; 2111 else 2112 values[i].f = ir->value.i[i]; 2113 } 2114 break; 2115 case GLSL_TYPE_BOOL: 2116 gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; 2117 for (i = 0; i < ir->type->vector_elements; i++) { 2118 if (glsl_version >= 130) 2119 values[i].b = ir->value.b[i]; 2120 else 2121 values[i].f = ir->value.b[i]; 2122 } 2123 break; 2124 default: 2125 assert(!"Non-float/uint/int/bool constant"); 2126 } 2127 2128 this->result = st_src_reg(file, -1, ir->type); 2129 this->result.index = add_constant(file, 2130 values, 2131 ir->type->vector_elements, 2132 gl_type, 2133 &this->result.swizzle); 2134} 2135 2136function_entry * 2137glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2138{ 2139 function_entry *entry; 2140 2141 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2142 entry = (function_entry *)iter.get(); 2143 2144 if (entry->sig == sig) 2145 return entry; 2146 } 2147 2148 entry = ralloc(mem_ctx, function_entry); 2149 entry->sig = sig; 2150 entry->sig_id = this->next_signature_id++; 2151 entry->bgn_inst = NULL; 2152 2153 /* Allocate storage for all the parameters. */ 2154 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2155 ir_variable *param = (ir_variable *)iter.get(); 2156 variable_storage *storage; 2157 2158 storage = find_variable_storage(param); 2159 assert(!storage); 2160 2161 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2162 this->next_temp); 2163 this->variables.push_tail(storage); 2164 2165 this->next_temp += type_size(param->type); 2166 } 2167 2168 if (!sig->return_type->is_void()) { 2169 entry->return_reg = get_temp(sig->return_type); 2170 } else { 2171 entry->return_reg = undef_src; 2172 } 2173 2174 this->function_signatures.push_tail(entry); 2175 return entry; 2176} 2177 2178void 2179glsl_to_tgsi_visitor::visit(ir_call *ir) 2180{ 2181 glsl_to_tgsi_instruction *call_inst; 2182 ir_function_signature *sig = ir->get_callee(); 2183 function_entry *entry = get_function_signature(sig); 2184 int i; 2185 2186 /* Process in parameters. */ 2187 exec_list_iterator sig_iter = sig->parameters.iterator(); 2188 foreach_iter(exec_list_iterator, iter, *ir) { 2189 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2190 ir_variable *param = (ir_variable *)sig_iter.get(); 2191 2192 if (param->mode == ir_var_in || 2193 param->mode == ir_var_inout) { 2194 variable_storage *storage = find_variable_storage(param); 2195 assert(storage); 2196 2197 param_rval->accept(this); 2198 st_src_reg r = this->result; 2199 2200 st_dst_reg l; 2201 l.file = storage->file; 2202 l.index = storage->index; 2203 l.reladdr = NULL; 2204 l.writemask = WRITEMASK_XYZW; 2205 l.cond_mask = COND_TR; 2206 2207 for (i = 0; i < type_size(param->type); i++) { 2208 emit(ir, TGSI_OPCODE_MOV, l, r); 2209 l.index++; 2210 r.index++; 2211 } 2212 } 2213 2214 sig_iter.next(); 2215 } 2216 assert(!sig_iter.has_next()); 2217 2218 /* Emit call instruction */ 2219 call_inst = emit(ir, TGSI_OPCODE_CAL); 2220 call_inst->function = entry; 2221 2222 /* Process out parameters. */ 2223 sig_iter = sig->parameters.iterator(); 2224 foreach_iter(exec_list_iterator, iter, *ir) { 2225 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2226 ir_variable *param = (ir_variable *)sig_iter.get(); 2227 2228 if (param->mode == ir_var_out || 2229 param->mode == ir_var_inout) { 2230 variable_storage *storage = find_variable_storage(param); 2231 assert(storage); 2232 2233 st_src_reg r; 2234 r.file = storage->file; 2235 r.index = storage->index; 2236 r.reladdr = NULL; 2237 r.swizzle = SWIZZLE_NOOP; 2238 r.negate = 0; 2239 2240 param_rval->accept(this); 2241 st_dst_reg l = st_dst_reg(this->result); 2242 2243 for (i = 0; i < type_size(param->type); i++) { 2244 emit(ir, TGSI_OPCODE_MOV, l, r); 2245 l.index++; 2246 r.index++; 2247 } 2248 } 2249 2250 sig_iter.next(); 2251 } 2252 assert(!sig_iter.has_next()); 2253 2254 /* Process return value. */ 2255 this->result = entry->return_reg; 2256} 2257 2258void 2259glsl_to_tgsi_visitor::visit(ir_texture *ir) 2260{ 2261 st_src_reg result_src, coord, lod_info, projector, dx, dy; 2262 st_dst_reg result_dst, coord_dst; 2263 glsl_to_tgsi_instruction *inst = NULL; 2264 unsigned opcode = TGSI_OPCODE_NOP; 2265 2266 ir->coordinate->accept(this); 2267 2268 /* Put our coords in a temp. We'll need to modify them for shadow, 2269 * projection, or LOD, so the only case we'd use it as is is if 2270 * we're doing plain old texturing. The optimization passes on 2271 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2272 */ 2273 coord = get_temp(glsl_type::vec4_type); 2274 coord_dst = st_dst_reg(coord); 2275 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2276 2277 if (ir->projector) { 2278 ir->projector->accept(this); 2279 projector = this->result; 2280 } 2281 2282 /* Storage for our result. Ideally for an assignment we'd be using 2283 * the actual storage for the result here, instead. 2284 */ 2285 result_src = get_temp(glsl_type::vec4_type); 2286 result_dst = st_dst_reg(result_src); 2287 2288 switch (ir->op) { 2289 case ir_tex: 2290 opcode = TGSI_OPCODE_TEX; 2291 break; 2292 case ir_txb: 2293 opcode = TGSI_OPCODE_TXB; 2294 ir->lod_info.bias->accept(this); 2295 lod_info = this->result; 2296 break; 2297 case ir_txl: 2298 opcode = TGSI_OPCODE_TXL; 2299 ir->lod_info.lod->accept(this); 2300 lod_info = this->result; 2301 break; 2302 case ir_txd: 2303 opcode = TGSI_OPCODE_TXD; 2304 ir->lod_info.grad.dPdx->accept(this); 2305 dx = this->result; 2306 ir->lod_info.grad.dPdy->accept(this); 2307 dy = this->result; 2308 break; 2309 case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ 2310 assert(!"GLSL 1.30 features unsupported"); 2311 break; 2312 } 2313 2314 if (ir->projector) { 2315 if (opcode == TGSI_OPCODE_TEX) { 2316 /* Slot the projector in as the last component of the coord. */ 2317 coord_dst.writemask = WRITEMASK_W; 2318 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2319 coord_dst.writemask = WRITEMASK_XYZW; 2320 opcode = TGSI_OPCODE_TXP; 2321 } else { 2322 st_src_reg coord_w = coord; 2323 coord_w.swizzle = SWIZZLE_WWWW; 2324 2325 /* For the other TEX opcodes there's no projective version 2326 * since the last slot is taken up by LOD info. Do the 2327 * projective divide now. 2328 */ 2329 coord_dst.writemask = WRITEMASK_W; 2330 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2331 2332 /* In the case where we have to project the coordinates "by hand," 2333 * the shadow comparator value must also be projected. 2334 */ 2335 st_src_reg tmp_src = coord; 2336 if (ir->shadow_comparitor) { 2337 /* Slot the shadow value in as the second to last component of the 2338 * coord. 2339 */ 2340 ir->shadow_comparitor->accept(this); 2341 2342 tmp_src = get_temp(glsl_type::vec4_type); 2343 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2344 2345 tmp_dst.writemask = WRITEMASK_Z; 2346 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2347 2348 tmp_dst.writemask = WRITEMASK_XY; 2349 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2350 } 2351 2352 coord_dst.writemask = WRITEMASK_XYZ; 2353 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2354 2355 coord_dst.writemask = WRITEMASK_XYZW; 2356 coord.swizzle = SWIZZLE_XYZW; 2357 } 2358 } 2359 2360 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2361 * comparator was put in the correct place (and projected) by the code, 2362 * above, that handles by-hand projection. 2363 */ 2364 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2365 /* Slot the shadow value in as the second to last component of the 2366 * coord. 2367 */ 2368 ir->shadow_comparitor->accept(this); 2369 coord_dst.writemask = WRITEMASK_Z; 2370 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2371 coord_dst.writemask = WRITEMASK_XYZW; 2372 } 2373 2374 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { 2375 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2376 coord_dst.writemask = WRITEMASK_W; 2377 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2378 coord_dst.writemask = WRITEMASK_XYZW; 2379 } 2380 2381 if (opcode == TGSI_OPCODE_TXD) 2382 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2383 else 2384 inst = emit(ir, opcode, result_dst, coord); 2385 2386 if (ir->shadow_comparitor) 2387 inst->tex_shadow = GL_TRUE; 2388 2389 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2390 this->shader_program, 2391 this->prog); 2392 2393 const glsl_type *sampler_type = ir->sampler->type; 2394 2395 switch (sampler_type->sampler_dimensionality) { 2396 case GLSL_SAMPLER_DIM_1D: 2397 inst->tex_target = (sampler_type->sampler_array) 2398 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2399 break; 2400 case GLSL_SAMPLER_DIM_2D: 2401 inst->tex_target = (sampler_type->sampler_array) 2402 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2403 break; 2404 case GLSL_SAMPLER_DIM_3D: 2405 inst->tex_target = TEXTURE_3D_INDEX; 2406 break; 2407 case GLSL_SAMPLER_DIM_CUBE: 2408 inst->tex_target = TEXTURE_CUBE_INDEX; 2409 break; 2410 case GLSL_SAMPLER_DIM_RECT: 2411 inst->tex_target = TEXTURE_RECT_INDEX; 2412 break; 2413 case GLSL_SAMPLER_DIM_BUF: 2414 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2415 break; 2416 default: 2417 assert(!"Should not get here."); 2418 } 2419 2420 this->result = result_src; 2421} 2422 2423void 2424glsl_to_tgsi_visitor::visit(ir_return *ir) 2425{ 2426 if (ir->get_value()) { 2427 st_dst_reg l; 2428 int i; 2429 2430 assert(current_function); 2431 2432 ir->get_value()->accept(this); 2433 st_src_reg r = this->result; 2434 2435 l = st_dst_reg(current_function->return_reg); 2436 2437 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2438 emit(ir, TGSI_OPCODE_MOV, l, r); 2439 l.index++; 2440 r.index++; 2441 } 2442 } 2443 2444 emit(ir, TGSI_OPCODE_RET); 2445} 2446 2447void 2448glsl_to_tgsi_visitor::visit(ir_discard *ir) 2449{ 2450 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2451 2452 if (ir->condition) { 2453 ir->condition->accept(this); 2454 this->result.negate = ~this->result.negate; 2455 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2456 } else { 2457 emit(ir, TGSI_OPCODE_KILP); 2458 } 2459 2460 fp->UsesKill = GL_TRUE; 2461} 2462 2463void 2464glsl_to_tgsi_visitor::visit(ir_if *ir) 2465{ 2466 glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; 2467 glsl_to_tgsi_instruction *prev_inst; 2468 2469 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2470 2471 ir->condition->accept(this); 2472 assert(this->result.file != PROGRAM_UNDEFINED); 2473 2474 if (this->options->EmitCondCodes) { 2475 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2476 2477 /* See if we actually generated any instruction for generating 2478 * the condition. If not, then cook up a move to a temp so we 2479 * have something to set cond_update on. 2480 */ 2481 if (cond_inst == prev_inst) { 2482 st_src_reg temp = get_temp(glsl_type::bool_type); 2483 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2484 } 2485 cond_inst->cond_update = GL_TRUE; 2486 2487 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2488 if_inst->dst.cond_mask = COND_NE; 2489 } else { 2490 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2491 } 2492 2493 this->instructions.push_tail(if_inst); 2494 2495 visit_exec_list(&ir->then_instructions, this); 2496 2497 if (!ir->else_instructions.is_empty()) { 2498 else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); 2499 visit_exec_list(&ir->else_instructions, this); 2500 } 2501 2502 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2503} 2504 2505glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2506{ 2507 result.file = PROGRAM_UNDEFINED; 2508 next_temp = 1; 2509 next_signature_id = 1; 2510 num_immediates = 0; 2511 current_function = NULL; 2512 num_address_regs = 0; 2513 indirect_addr_temps = false; 2514 indirect_addr_consts = false; 2515 mem_ctx = ralloc_context(NULL); 2516} 2517 2518glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2519{ 2520 ralloc_free(mem_ctx); 2521} 2522 2523extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2524{ 2525 delete v; 2526} 2527 2528 2529/** 2530 * Count resources used by the given gpu program (number of texture 2531 * samplers, etc). 2532 */ 2533static void 2534count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2535{ 2536 v->samplers_used = 0; 2537 2538 foreach_iter(exec_list_iterator, iter, v->instructions) { 2539 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2540 2541 if (is_tex_instruction(inst->op)) { 2542 v->samplers_used |= 1 << inst->sampler; 2543 2544 prog->SamplerTargets[inst->sampler] = 2545 (gl_texture_index)inst->tex_target; 2546 if (inst->tex_shadow) { 2547 prog->ShadowSamplers |= 1 << inst->sampler; 2548 } 2549 } 2550 } 2551 2552 prog->SamplersUsed = v->samplers_used; 2553 _mesa_update_shader_textures_used(prog); 2554} 2555 2556 2557/** 2558 * Check if the given vertex/fragment/shader program is within the 2559 * resource limits of the context (number of texture units, etc). 2560 * If any of those checks fail, record a linker error. 2561 * 2562 * XXX more checks are needed... 2563 */ 2564static void 2565check_resources(const struct gl_context *ctx, 2566 struct gl_shader_program *shader_program, 2567 glsl_to_tgsi_visitor *prog, 2568 struct gl_program *proginfo) 2569{ 2570 switch (proginfo->Target) { 2571 case GL_VERTEX_PROGRAM_ARB: 2572 if (_mesa_bitcount(prog->samplers_used) > 2573 ctx->Const.MaxVertexTextureImageUnits) { 2574 fail_link(shader_program, "Too many vertex shader texture samplers"); 2575 } 2576 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2577 fail_link(shader_program, "Too many vertex shader constants"); 2578 } 2579 break; 2580 case MESA_GEOMETRY_PROGRAM: 2581 if (_mesa_bitcount(prog->samplers_used) > 2582 ctx->Const.MaxGeometryTextureImageUnits) { 2583 fail_link(shader_program, "Too many geometry shader texture samplers"); 2584 } 2585 if (proginfo->Parameters->NumParameters > 2586 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { 2587 fail_link(shader_program, "Too many geometry shader constants"); 2588 } 2589 break; 2590 case GL_FRAGMENT_PROGRAM_ARB: 2591 if (_mesa_bitcount(prog->samplers_used) > 2592 ctx->Const.MaxTextureImageUnits) { 2593 fail_link(shader_program, "Too many fragment shader texture samplers"); 2594 } 2595 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { 2596 fail_link(shader_program, "Too many fragment shader constants"); 2597 } 2598 break; 2599 default: 2600 _mesa_problem(ctx, "unexpected program type in check_resources()"); 2601 } 2602} 2603 2604 2605 2606struct uniform_sort { 2607 struct gl_uniform *u; 2608 int pos; 2609}; 2610 2611/* The shader_program->Uniforms list is almost sorted in increasing 2612 * uniform->{Frag,Vert}Pos locations, but not quite when there are 2613 * uniforms shared between targets. We need to add parameters in 2614 * increasing order for the targets. 2615 */ 2616static int 2617sort_uniforms(const void *a, const void *b) 2618{ 2619 struct uniform_sort *u1 = (struct uniform_sort *)a; 2620 struct uniform_sort *u2 = (struct uniform_sort *)b; 2621 2622 return u1->pos - u2->pos; 2623} 2624 2625/* Add the uniforms to the parameters. The linker chose locations 2626 * in our parameters lists (which weren't created yet), which the 2627 * uniforms code will use to poke values into our parameters list 2628 * when uniforms are updated. 2629 */ 2630static void 2631add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, 2632 struct gl_shader *shader, 2633 struct gl_program *prog) 2634{ 2635 unsigned int i; 2636 unsigned int next_sampler = 0, num_uniforms = 0; 2637 struct uniform_sort *sorted_uniforms; 2638 2639 sorted_uniforms = ralloc_array(NULL, struct uniform_sort, 2640 shader_program->Uniforms->NumUniforms); 2641 2642 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { 2643 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; 2644 int parameter_index = -1; 2645 2646 switch (shader->Type) { 2647 case GL_VERTEX_SHADER: 2648 parameter_index = uniform->VertPos; 2649 break; 2650 case GL_FRAGMENT_SHADER: 2651 parameter_index = uniform->FragPos; 2652 break; 2653 case GL_GEOMETRY_SHADER: 2654 parameter_index = uniform->GeomPos; 2655 break; 2656 } 2657 2658 /* Only add uniforms used in our target. */ 2659 if (parameter_index != -1) { 2660 sorted_uniforms[num_uniforms].pos = parameter_index; 2661 sorted_uniforms[num_uniforms].u = uniform; 2662 num_uniforms++; 2663 } 2664 } 2665 2666 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), 2667 sort_uniforms); 2668 2669 for (i = 0; i < num_uniforms; i++) { 2670 struct gl_uniform *uniform = sorted_uniforms[i].u; 2671 int parameter_index = sorted_uniforms[i].pos; 2672 const glsl_type *type = uniform->Type; 2673 unsigned int size; 2674 2675 if (type->is_vector() || 2676 type->is_scalar()) { 2677 size = type->vector_elements; 2678 } else { 2679 size = type_size(type) * 4; 2680 } 2681 2682 gl_register_file file; 2683 if (type->is_sampler() || 2684 (type->is_array() && type->fields.array->is_sampler())) { 2685 file = PROGRAM_SAMPLER; 2686 } else { 2687 file = PROGRAM_UNIFORM; 2688 } 2689 2690 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, 2691 uniform->Name); 2692 2693 if (index < 0) { 2694 index = _mesa_add_parameter(prog->Parameters, file, 2695 uniform->Name, size, type->gl_type, 2696 NULL, NULL, 0x0); 2697 2698 /* Sampler uniform values are stored in prog->SamplerUnits, 2699 * and the entry in that array is selected by this index we 2700 * store in ParameterValues[]. 2701 */ 2702 if (file == PROGRAM_SAMPLER) { 2703 for (unsigned int j = 0; j < size / 4; j++) 2704 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; 2705 } 2706 2707 /* The location chosen in the Parameters list here (returned 2708 * from _mesa_add_uniform) has to match what the linker chose. 2709 */ 2710 if (index != parameter_index) { 2711 fail_link(shader_program, "Allocation of uniform `%s' to target " 2712 "failed (%d vs %d)\n", 2713 uniform->Name, index, parameter_index); 2714 } 2715 } 2716 } 2717 2718 ralloc_free(sorted_uniforms); 2719} 2720 2721static void 2722set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2723 struct gl_shader_program *shader_program, 2724 const char *name, const glsl_type *type, 2725 ir_constant *val) 2726{ 2727 if (type->is_record()) { 2728 ir_constant *field_constant; 2729 2730 field_constant = (ir_constant *)val->components.get_head(); 2731 2732 for (unsigned int i = 0; i < type->length; i++) { 2733 const glsl_type *field_type = type->fields.structure[i].type; 2734 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2735 type->fields.structure[i].name); 2736 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2737 field_type, field_constant); 2738 field_constant = (ir_constant *)field_constant->next; 2739 } 2740 return; 2741 } 2742 2743 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2744 2745 if (loc == -1) { 2746 fail_link(shader_program, 2747 "Couldn't find uniform for initializer %s\n", name); 2748 return; 2749 } 2750 2751 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2752 ir_constant *element; 2753 const glsl_type *element_type; 2754 if (type->is_array()) { 2755 element = val->array_elements[i]; 2756 element_type = type->fields.array; 2757 } else { 2758 element = val; 2759 element_type = type; 2760 } 2761 2762 void *values; 2763 2764 if (element_type->base_type == GLSL_TYPE_BOOL) { 2765 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2766 for (unsigned int j = 0; j < element_type->components(); j++) { 2767 conv[j] = element->value.b[j]; 2768 } 2769 values = (void *)conv; 2770 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2771 element_type->vector_elements, 2772 1); 2773 } else { 2774 values = &element->value; 2775 } 2776 2777 if (element_type->is_matrix()) { 2778 _mesa_uniform_matrix(ctx, shader_program, 2779 element_type->matrix_columns, 2780 element_type->vector_elements, 2781 loc, 1, GL_FALSE, (GLfloat *)values); 2782 loc += element_type->matrix_columns; 2783 } else { 2784 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2785 values, element_type->gl_type); 2786 loc += type_size(element_type); 2787 } 2788 } 2789} 2790 2791static void 2792set_uniform_initializers(struct gl_context *ctx, 2793 struct gl_shader_program *shader_program) 2794{ 2795 void *mem_ctx = NULL; 2796 2797 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { 2798 struct gl_shader *shader = shader_program->_LinkedShaders[i]; 2799 2800 if (shader == NULL) 2801 continue; 2802 2803 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2804 ir_instruction *ir = (ir_instruction *)iter.get(); 2805 ir_variable *var = ir->as_variable(); 2806 2807 if (!var || var->mode != ir_var_uniform || !var->constant_value) 2808 continue; 2809 2810 if (!mem_ctx) 2811 mem_ctx = ralloc_context(NULL); 2812 2813 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, 2814 var->type, var->constant_value); 2815 } 2816 } 2817 2818 ralloc_free(mem_ctx); 2819} 2820 2821/* 2822 * Scan/rewrite program to remove reads of custom (output) registers. 2823 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 2824 * (for vertex shaders). 2825 * In GLSL shaders, varying vars can be read and written. 2826 * On some hardware, trying to read an output register causes trouble. 2827 * So, rewrite the program to use a temporary register in this case. 2828 * 2829 * Based on _mesa_remove_output_reads from programopt.c. 2830 */ 2831void 2832glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) 2833{ 2834 GLuint i; 2835 GLint outputMap[VERT_RESULT_MAX]; 2836 GLint outputTypes[VERT_RESULT_MAX]; 2837 GLuint numVaryingReads = 0; 2838 GLboolean usedTemps[MAX_TEMPS]; 2839 GLuint firstTemp = 0; 2840 2841 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, 2842 usedTemps, MAX_TEMPS); 2843 2844 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 2845 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 2846 2847 for (i = 0; i < VERT_RESULT_MAX; i++) 2848 outputMap[i] = -1; 2849 2850 /* look for instructions which read from varying vars */ 2851 foreach_iter(exec_list_iterator, iter, this->instructions) { 2852 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2853 const GLuint numSrc = num_inst_src_regs(inst->op); 2854 GLuint j; 2855 for (j = 0; j < numSrc; j++) { 2856 if (inst->src[j].file == type) { 2857 /* replace the read with a temp reg */ 2858 const GLuint var = inst->src[j].index; 2859 if (outputMap[var] == -1) { 2860 numVaryingReads++; 2861 outputMap[var] = _mesa_find_free_register(usedTemps, 2862 MAX_TEMPS, 2863 firstTemp); 2864 outputTypes[var] = inst->src[j].type; 2865 firstTemp = outputMap[var] + 1; 2866 } 2867 inst->src[j].file = PROGRAM_TEMPORARY; 2868 inst->src[j].index = outputMap[var]; 2869 } 2870 } 2871 } 2872 2873 if (numVaryingReads == 0) 2874 return; /* nothing to be done */ 2875 2876 /* look for instructions which write to the varying vars identified above */ 2877 foreach_iter(exec_list_iterator, iter, this->instructions) { 2878 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2879 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { 2880 /* change inst to write to the temp reg, instead of the varying */ 2881 inst->dst.file = PROGRAM_TEMPORARY; 2882 inst->dst.index = outputMap[inst->dst.index]; 2883 } 2884 } 2885 2886 /* insert new MOV instructions at the end */ 2887 for (i = 0; i < VERT_RESULT_MAX; i++) { 2888 if (outputMap[i] >= 0) { 2889 /* MOV VAR[i], TEMP[tmp]; */ 2890 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); 2891 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); 2892 dst.index = i; 2893 this->emit(NULL, TGSI_OPCODE_MOV, dst, src); 2894 } 2895 } 2896} 2897 2898/** 2899 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2900 * are read from the given src in this instruction 2901 */ 2902static int 2903get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2904{ 2905 int read_mask = 0, comp; 2906 2907 /* Now, given the src swizzle and the written channels, find which 2908 * components are actually read 2909 */ 2910 for (comp = 0; comp < 4; ++comp) { 2911 const unsigned coord = GET_SWZ(src.swizzle, comp); 2912 ASSERT(coord < 4); 2913 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2914 read_mask |= 1 << coord; 2915 } 2916 2917 return read_mask; 2918} 2919 2920/** 2921 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 2922 * instruction is the first instruction to write to register T0. There are 2923 * several lowering passes done in GLSL IR (e.g. branches and 2924 * relative addressing) that create a large number of conditional assignments 2925 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 2926 * 2927 * Here is why this conversion is safe: 2928 * CMP T0, T1 T2 T0 can be expanded to: 2929 * if (T1 < 0.0) 2930 * MOV T0, T2; 2931 * else 2932 * MOV T0, T0; 2933 * 2934 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 2935 * as the original program. If (T1 < 0.0) evaluates to false, executing 2936 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 2937 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 2938 * because any instruction that was going to read from T0 after this was going 2939 * to read a garbage value anyway. 2940 */ 2941void 2942glsl_to_tgsi_visitor::simplify_cmp(void) 2943{ 2944 unsigned tempWrites[MAX_TEMPS]; 2945 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 2946 2947 memset(tempWrites, 0, sizeof(tempWrites)); 2948 memset(outputWrites, 0, sizeof(outputWrites)); 2949 2950 foreach_iter(exec_list_iterator, iter, this->instructions) { 2951 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2952 unsigned prevWriteMask = 0; 2953 2954 /* Give up if we encounter relative addressing or flow control. */ 2955 if (inst->dst.reladdr || 2956 tgsi_get_opcode_info(inst->op)->is_branch || 2957 inst->op == TGSI_OPCODE_BGNSUB || 2958 inst->op == TGSI_OPCODE_CONT || 2959 inst->op == TGSI_OPCODE_END || 2960 inst->op == TGSI_OPCODE_ENDSUB || 2961 inst->op == TGSI_OPCODE_RET) { 2962 return; 2963 } 2964 2965 if (inst->dst.file == PROGRAM_OUTPUT) { 2966 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 2967 prevWriteMask = outputWrites[inst->dst.index]; 2968 outputWrites[inst->dst.index] |= inst->dst.writemask; 2969 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 2970 assert(inst->dst.index < MAX_TEMPS); 2971 prevWriteMask = tempWrites[inst->dst.index]; 2972 tempWrites[inst->dst.index] |= inst->dst.writemask; 2973 } 2974 2975 /* For a CMP to be considered a conditional write, the destination 2976 * register and source register two must be the same. */ 2977 if (inst->op == TGSI_OPCODE_CMP 2978 && !(inst->dst.writemask & prevWriteMask) 2979 && inst->src[2].file == inst->dst.file 2980 && inst->src[2].index == inst->dst.index 2981 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 2982 2983 inst->op = TGSI_OPCODE_MOV; 2984 inst->src[0] = inst->src[1]; 2985 } 2986 } 2987} 2988 2989/* Replaces all references to a temporary register index with another index. */ 2990void 2991glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 2992{ 2993 foreach_iter(exec_list_iterator, iter, this->instructions) { 2994 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2995 unsigned j; 2996 2997 for (j=0; j < num_inst_src_regs(inst->op); j++) { 2998 if (inst->src[j].file == PROGRAM_TEMPORARY && 2999 inst->src[j].index == index) { 3000 inst->src[j].index = new_index; 3001 } 3002 } 3003 3004 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3005 inst->dst.index = new_index; 3006 } 3007 } 3008} 3009 3010int 3011glsl_to_tgsi_visitor::get_first_temp_read(int index) 3012{ 3013 int depth = 0; /* loop depth */ 3014 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3015 unsigned i = 0, j; 3016 3017 foreach_iter(exec_list_iterator, iter, this->instructions) { 3018 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3019 3020 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3021 if (inst->src[j].file == PROGRAM_TEMPORARY && 3022 inst->src[j].index == index) { 3023 return (depth == 0) ? i : loop_start; 3024 } 3025 } 3026 3027 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3028 if(depth++ == 0) 3029 loop_start = i; 3030 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3031 if (--depth == 0) 3032 loop_start = -1; 3033 } 3034 assert(depth >= 0); 3035 3036 i++; 3037 } 3038 3039 return -1; 3040} 3041 3042int 3043glsl_to_tgsi_visitor::get_first_temp_write(int index) 3044{ 3045 int depth = 0; /* loop depth */ 3046 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3047 int i = 0; 3048 3049 foreach_iter(exec_list_iterator, iter, this->instructions) { 3050 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3051 3052 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3053 return (depth == 0) ? i : loop_start; 3054 } 3055 3056 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3057 if(depth++ == 0) 3058 loop_start = i; 3059 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3060 if (--depth == 0) 3061 loop_start = -1; 3062 } 3063 assert(depth >= 0); 3064 3065 i++; 3066 } 3067 3068 return -1; 3069} 3070 3071int 3072glsl_to_tgsi_visitor::get_last_temp_read(int index) 3073{ 3074 int depth = 0; /* loop depth */ 3075 int last = -1; /* index of last instruction that reads the temporary */ 3076 unsigned i = 0, j; 3077 3078 foreach_iter(exec_list_iterator, iter, this->instructions) { 3079 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3080 3081 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3082 if (inst->src[j].file == PROGRAM_TEMPORARY && 3083 inst->src[j].index == index) { 3084 last = (depth == 0) ? i : -2; 3085 } 3086 } 3087 3088 if (inst->op == TGSI_OPCODE_BGNLOOP) 3089 depth++; 3090 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3091 if (--depth == 0 && last == -2) 3092 last = i; 3093 assert(depth >= 0); 3094 3095 i++; 3096 } 3097 3098 assert(last >= -1); 3099 return last; 3100} 3101 3102int 3103glsl_to_tgsi_visitor::get_last_temp_write(int index) 3104{ 3105 int depth = 0; /* loop depth */ 3106 int last = -1; /* index of last instruction that writes to the temporary */ 3107 int i = 0; 3108 3109 foreach_iter(exec_list_iterator, iter, this->instructions) { 3110 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3111 3112 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3113 last = (depth == 0) ? i : -2; 3114 3115 if (inst->op == TGSI_OPCODE_BGNLOOP) 3116 depth++; 3117 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3118 if (--depth == 0 && last == -2) 3119 last = i; 3120 assert(depth >= 0); 3121 3122 i++; 3123 } 3124 3125 assert(last >= -1); 3126 return last; 3127} 3128 3129/* 3130 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3131 * channels for copy propagation and updates following instructions to 3132 * use the original versions. 3133 * 3134 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3135 * will occur. As an example, a TXP production before this pass: 3136 * 3137 * 0: MOV TEMP[1], INPUT[4].xyyy; 3138 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3139 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3140 * 3141 * and after: 3142 * 3143 * 0: MOV TEMP[1], INPUT[4].xyyy; 3144 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3145 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3146 * 3147 * which allows for dead code elimination on TEMP[1]'s writes. 3148 */ 3149void 3150glsl_to_tgsi_visitor::copy_propagate(void) 3151{ 3152 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3153 glsl_to_tgsi_instruction *, 3154 this->next_temp * 4); 3155 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3156 int level = 0; 3157 3158 foreach_iter(exec_list_iterator, iter, this->instructions) { 3159 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3160 3161 assert(inst->dst.file != PROGRAM_TEMPORARY 3162 || inst->dst.index < this->next_temp); 3163 3164 /* First, do any copy propagation possible into the src regs. */ 3165 for (int r = 0; r < 3; r++) { 3166 glsl_to_tgsi_instruction *first = NULL; 3167 bool good = true; 3168 int acp_base = inst->src[r].index * 4; 3169 3170 if (inst->src[r].file != PROGRAM_TEMPORARY || 3171 inst->src[r].reladdr) 3172 continue; 3173 3174 /* See if we can find entries in the ACP consisting of MOVs 3175 * from the same src register for all the swizzled channels 3176 * of this src register reference. 3177 */ 3178 for (int i = 0; i < 4; i++) { 3179 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3180 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3181 3182 if (!copy_chan) { 3183 good = false; 3184 break; 3185 } 3186 3187 assert(acp_level[acp_base + src_chan] <= level); 3188 3189 if (!first) { 3190 first = copy_chan; 3191 } else { 3192 if (first->src[0].file != copy_chan->src[0].file || 3193 first->src[0].index != copy_chan->src[0].index) { 3194 good = false; 3195 break; 3196 } 3197 } 3198 } 3199 3200 if (good) { 3201 /* We've now validated that we can copy-propagate to 3202 * replace this src register reference. Do it. 3203 */ 3204 inst->src[r].file = first->src[0].file; 3205 inst->src[r].index = first->src[0].index; 3206 3207 int swizzle = 0; 3208 for (int i = 0; i < 4; i++) { 3209 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3210 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3211 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3212 (3 * i)); 3213 } 3214 inst->src[r].swizzle = swizzle; 3215 } 3216 } 3217 3218 switch (inst->op) { 3219 case TGSI_OPCODE_BGNLOOP: 3220 case TGSI_OPCODE_ENDLOOP: 3221 /* End of a basic block, clear the ACP entirely. */ 3222 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3223 break; 3224 3225 case TGSI_OPCODE_IF: 3226 ++level; 3227 break; 3228 3229 case TGSI_OPCODE_ENDIF: 3230 case TGSI_OPCODE_ELSE: 3231 /* Clear all channels written inside the block from the ACP, but 3232 * leaving those that were not touched. 3233 */ 3234 for (int r = 0; r < this->next_temp; r++) { 3235 for (int c = 0; c < 4; c++) { 3236 if (!acp[4 * r + c]) 3237 continue; 3238 3239 if (acp_level[4 * r + c] >= level) 3240 acp[4 * r + c] = NULL; 3241 } 3242 } 3243 if (inst->op == TGSI_OPCODE_ENDIF) 3244 --level; 3245 break; 3246 3247 default: 3248 /* Continuing the block, clear any written channels from 3249 * the ACP. 3250 */ 3251 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3252 /* Any temporary might be written, so no copy propagation 3253 * across this instruction. 3254 */ 3255 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3256 } else if (inst->dst.file == PROGRAM_OUTPUT && 3257 inst->dst.reladdr) { 3258 /* Any output might be written, so no copy propagation 3259 * from outputs across this instruction. 3260 */ 3261 for (int r = 0; r < this->next_temp; r++) { 3262 for (int c = 0; c < 4; c++) { 3263 if (!acp[4 * r + c]) 3264 continue; 3265 3266 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3267 acp[4 * r + c] = NULL; 3268 } 3269 } 3270 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3271 inst->dst.file == PROGRAM_OUTPUT) { 3272 /* Clear where it's used as dst. */ 3273 if (inst->dst.file == PROGRAM_TEMPORARY) { 3274 for (int c = 0; c < 4; c++) { 3275 if (inst->dst.writemask & (1 << c)) { 3276 acp[4 * inst->dst.index + c] = NULL; 3277 } 3278 } 3279 } 3280 3281 /* Clear where it's used as src. */ 3282 for (int r = 0; r < this->next_temp; r++) { 3283 for (int c = 0; c < 4; c++) { 3284 if (!acp[4 * r + c]) 3285 continue; 3286 3287 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3288 3289 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3290 acp[4 * r + c]->src[0].index == inst->dst.index && 3291 inst->dst.writemask & (1 << src_chan)) 3292 { 3293 acp[4 * r + c] = NULL; 3294 } 3295 } 3296 } 3297 } 3298 break; 3299 } 3300 3301 /* If this is a copy, add it to the ACP. */ 3302 if (inst->op == TGSI_OPCODE_MOV && 3303 inst->dst.file == PROGRAM_TEMPORARY && 3304 !inst->dst.reladdr && 3305 !inst->saturate && 3306 !inst->src[0].reladdr && 3307 !inst->src[0].negate) { 3308 for (int i = 0; i < 4; i++) { 3309 if (inst->dst.writemask & (1 << i)) { 3310 acp[4 * inst->dst.index + i] = inst; 3311 acp_level[4 * inst->dst.index + i] = level; 3312 } 3313 } 3314 } 3315 } 3316 3317 ralloc_free(acp_level); 3318 ralloc_free(acp); 3319} 3320 3321/* 3322 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3323 * 3324 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3325 * will occur. As an example, a TXP production after copy propagation but 3326 * before this pass: 3327 * 3328 * 0: MOV TEMP[1], INPUT[4].xyyy; 3329 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3330 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3331 * 3332 * and after this pass: 3333 * 3334 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3335 * 3336 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3337 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3338 */ 3339void 3340glsl_to_tgsi_visitor::eliminate_dead_code(void) 3341{ 3342 int i; 3343 3344 for (i=0; i < this->next_temp; i++) { 3345 int last_read = get_last_temp_read(i); 3346 int j = 0; 3347 3348 foreach_iter(exec_list_iterator, iter, this->instructions) { 3349 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3350 3351 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3352 j > last_read) 3353 { 3354 iter.remove(); 3355 delete inst; 3356 } 3357 3358 j++; 3359 } 3360 } 3361} 3362 3363/* 3364 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3365 * code elimination. This is less primitive than eliminate_dead_code(), as it 3366 * is per-channel and can detect consecutive writes without a read between them 3367 * as dead code. However, there is some dead code that can be eliminated by 3368 * eliminate_dead_code() but not this function - for example, this function 3369 * cannot eliminate an instruction writing to a register that is never read and 3370 * is the only instruction writing to that register. 3371 * 3372 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3373 * will occur. 3374 */ 3375int 3376glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3377{ 3378 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3379 glsl_to_tgsi_instruction *, 3380 this->next_temp * 4); 3381 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3382 int level = 0; 3383 int removed = 0; 3384 3385 foreach_iter(exec_list_iterator, iter, this->instructions) { 3386 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3387 3388 assert(inst->dst.file != PROGRAM_TEMPORARY 3389 || inst->dst.index < this->next_temp); 3390 3391 switch (inst->op) { 3392 case TGSI_OPCODE_BGNLOOP: 3393 case TGSI_OPCODE_ENDLOOP: 3394 /* End of a basic block, clear the write array entirely. 3395 * FIXME: This keeps us from killing dead code when the writes are 3396 * on either side of a loop, even when the register isn't touched 3397 * inside the loop. 3398 */ 3399 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3400 break; 3401 3402 case TGSI_OPCODE_ENDIF: 3403 --level; 3404 break; 3405 3406 case TGSI_OPCODE_ELSE: 3407 /* Clear all channels written inside the preceding if block from the 3408 * write array, but leave those that were not touched. 3409 * 3410 * FIXME: This destroys opportunities to remove dead code inside of 3411 * IF blocks that are followed by an ELSE block. 3412 */ 3413 for (int r = 0; r < this->next_temp; r++) { 3414 for (int c = 0; c < 4; c++) { 3415 if (!writes[4 * r + c]) 3416 continue; 3417 3418 if (write_level[4 * r + c] >= level) 3419 writes[4 * r + c] = NULL; 3420 } 3421 } 3422 break; 3423 3424 case TGSI_OPCODE_IF: 3425 ++level; 3426 /* fallthrough to default case to mark the condition as read */ 3427 3428 default: 3429 /* Continuing the block, clear any channels from the write array that 3430 * are read by this instruction. 3431 */ 3432 for (int i = 0; i < 4; i++) { 3433 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3434 /* Any temporary might be read, so no dead code elimination 3435 * across this instruction. 3436 */ 3437 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3438 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3439 /* Clear where it's used as src. */ 3440 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3441 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3442 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3443 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3444 3445 for (int c = 0; c < 4; c++) { 3446 if (src_chans & (1 << c)) { 3447 writes[4 * inst->src[i].index + c] = NULL; 3448 } 3449 } 3450 } 3451 } 3452 break; 3453 } 3454 3455 /* If this instruction writes to a temporary, add it to the write array. 3456 * If there is already an instruction in the write array for one or more 3457 * of the channels, flag that channel write as dead. 3458 */ 3459 if (inst->dst.file == PROGRAM_TEMPORARY && 3460 !inst->dst.reladdr && 3461 !inst->saturate) { 3462 for (int c = 0; c < 4; c++) { 3463 if (inst->dst.writemask & (1 << c)) { 3464 if (writes[4 * inst->dst.index + c]) { 3465 if (write_level[4 * inst->dst.index + c] < level) 3466 continue; 3467 else 3468 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3469 } 3470 writes[4 * inst->dst.index + c] = inst; 3471 write_level[4 * inst->dst.index + c] = level; 3472 } 3473 } 3474 } 3475 } 3476 3477 /* Anything still in the write array at this point is dead code. */ 3478 for (int r = 0; r < this->next_temp; r++) { 3479 for (int c = 0; c < 4; c++) { 3480 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3481 if (inst) 3482 inst->dead_mask |= (1 << c); 3483 } 3484 } 3485 3486 /* Now actually remove the instructions that are completely dead and update 3487 * the writemask of other instructions with dead channels. 3488 */ 3489 foreach_iter(exec_list_iterator, iter, this->instructions) { 3490 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3491 3492 if (!inst->dead_mask || !inst->dst.writemask) 3493 continue; 3494 else if (inst->dead_mask == inst->dst.writemask) { 3495 iter.remove(); 3496 delete inst; 3497 removed++; 3498 } else 3499 inst->dst.writemask &= ~(inst->dead_mask); 3500 } 3501 3502 ralloc_free(write_level); 3503 ralloc_free(writes); 3504 3505 return removed; 3506} 3507 3508/* Merges temporary registers together where possible to reduce the number of 3509 * registers needed to run a program. 3510 * 3511 * Produces optimal code only after copy propagation and dead code elimination 3512 * have been run. */ 3513void 3514glsl_to_tgsi_visitor::merge_registers(void) 3515{ 3516 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3517 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3518 int i, j; 3519 3520 /* Read the indices of the last read and first write to each temp register 3521 * into an array so that we don't have to traverse the instruction list as 3522 * much. */ 3523 for (i=0; i < this->next_temp; i++) { 3524 last_reads[i] = get_last_temp_read(i); 3525 first_writes[i] = get_first_temp_write(i); 3526 } 3527 3528 /* Start looking for registers with non-overlapping usages that can be 3529 * merged together. */ 3530 for (i=0; i < this->next_temp; i++) { 3531 /* Don't touch unused registers. */ 3532 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3533 3534 for (j=0; j < this->next_temp; j++) { 3535 /* Don't touch unused registers. */ 3536 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3537 3538 /* We can merge the two registers if the first write to j is after or 3539 * in the same instruction as the last read from i. Note that the 3540 * register at index i will always be used earlier or at the same time 3541 * as the register at index j. */ 3542 if (first_writes[i] <= first_writes[j] && 3543 last_reads[i] <= first_writes[j]) 3544 { 3545 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3546 3547 /* Update the first_writes and last_reads arrays with the new 3548 * values for the merged register index, and mark the newly unused 3549 * register index as such. */ 3550 last_reads[i] = last_reads[j]; 3551 first_writes[j] = -1; 3552 last_reads[j] = -1; 3553 } 3554 } 3555 } 3556 3557 ralloc_free(last_reads); 3558 ralloc_free(first_writes); 3559} 3560 3561/* Reassign indices to temporary registers by reusing unused indices created 3562 * by optimization passes. */ 3563void 3564glsl_to_tgsi_visitor::renumber_registers(void) 3565{ 3566 int i = 0; 3567 int new_index = 0; 3568 3569 for (i=0; i < this->next_temp; i++) { 3570 if (get_first_temp_read(i) < 0) continue; 3571 if (i != new_index) 3572 rename_temp_register(i, new_index); 3573 new_index++; 3574 } 3575 3576 this->next_temp = new_index; 3577} 3578 3579/** 3580 * Returns a fragment program which implements the current pixel transfer ops. 3581 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3582 */ 3583extern "C" void 3584get_pixel_transfer_visitor(struct st_fragment_program *fp, 3585 glsl_to_tgsi_visitor *original, 3586 int scale_and_bias, int pixel_maps) 3587{ 3588 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3589 struct st_context *st = st_context(original->ctx); 3590 struct gl_program *prog = &fp->Base.Base; 3591 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3592 st_src_reg coord, src0; 3593 st_dst_reg dst0; 3594 glsl_to_tgsi_instruction *inst; 3595 3596 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3597 v->ctx = original->ctx; 3598 v->prog = prog; 3599 v->glsl_version = original->glsl_version; 3600 v->options = original->options; 3601 v->next_temp = original->next_temp; 3602 v->num_address_regs = original->num_address_regs; 3603 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3604 v->indirect_addr_temps = original->indirect_addr_temps; 3605 v->indirect_addr_consts = original->indirect_addr_consts; 3606 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3607 3608 /* 3609 * Get initial pixel color from the texture. 3610 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3611 */ 3612 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3613 src0 = v->get_temp(glsl_type::vec4_type); 3614 dst0 = st_dst_reg(src0); 3615 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3616 inst->sampler = 0; 3617 inst->tex_target = TEXTURE_2D_INDEX; 3618 3619 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 3620 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3621 v->samplers_used |= (1 << 0); 3622 3623 if (scale_and_bias) { 3624 static const gl_state_index scale_state[STATE_LENGTH] = 3625 { STATE_INTERNAL, STATE_PT_SCALE, 3626 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3627 static const gl_state_index bias_state[STATE_LENGTH] = 3628 { STATE_INTERNAL, STATE_PT_BIAS, 3629 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3630 GLint scale_p, bias_p; 3631 st_src_reg scale, bias; 3632 3633 scale_p = _mesa_add_state_reference(params, scale_state); 3634 bias_p = _mesa_add_state_reference(params, bias_state); 3635 3636 /* MAD colorTemp, colorTemp, scale, bias; */ 3637 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3638 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3639 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3640 } 3641 3642 if (pixel_maps) { 3643 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3644 st_dst_reg temp_dst = st_dst_reg(temp); 3645 3646 assert(st->pixel_xfer.pixelmap_texture); 3647 3648 /* With a little effort, we can do four pixel map look-ups with 3649 * two TEX instructions: 3650 */ 3651 3652 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3653 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3654 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3655 inst->sampler = 1; 3656 inst->tex_target = TEXTURE_2D_INDEX; 3657 3658 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3659 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3660 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3661 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3662 inst->sampler = 1; 3663 inst->tex_target = TEXTURE_2D_INDEX; 3664 3665 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3666 v->samplers_used |= (1 << 1); 3667 3668 /* MOV colorTemp, temp; */ 3669 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3670 } 3671 3672 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3673 * new visitor. */ 3674 foreach_iter(exec_list_iterator, iter, original->instructions) { 3675 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3676 st_src_reg src_regs[3]; 3677 3678 if (inst->dst.file == PROGRAM_OUTPUT) 3679 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3680 3681 for (int i=0; i<3; i++) { 3682 src_regs[i] = inst->src[i]; 3683 if (src_regs[i].file == PROGRAM_INPUT && 3684 src_regs[i].index == FRAG_ATTRIB_COL0) 3685 { 3686 src_regs[i].file = PROGRAM_TEMPORARY; 3687 src_regs[i].index = src0.index; 3688 } 3689 else if (src_regs[i].file == PROGRAM_INPUT) 3690 prog->InputsRead |= (1 << src_regs[i].index); 3691 } 3692 3693 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3694 } 3695 3696 /* Make modifications to fragment program info. */ 3697 prog->Parameters = _mesa_combine_parameter_lists(params, 3698 original->prog->Parameters); 3699 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); 3700 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); 3701 _mesa_free_parameter_list(params); 3702 count_resources(v, prog); 3703 fp->glsl_to_tgsi = v; 3704} 3705 3706/** 3707 * Make fragment program for glBitmap: 3708 * Sample the texture and kill the fragment if the bit is 0. 3709 * This program will be combined with the user's fragment program. 3710 * 3711 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3712 */ 3713extern "C" void 3714get_bitmap_visitor(struct st_fragment_program *fp, 3715 glsl_to_tgsi_visitor *original, int samplerIndex) 3716{ 3717 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3718 struct st_context *st = st_context(original->ctx); 3719 struct gl_program *prog = &fp->Base.Base; 3720 st_src_reg coord, src0; 3721 st_dst_reg dst0; 3722 glsl_to_tgsi_instruction *inst; 3723 3724 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3725 v->ctx = original->ctx; 3726 v->prog = prog; 3727 v->glsl_version = original->glsl_version; 3728 v->options = original->options; 3729 v->next_temp = original->next_temp; 3730 v->num_address_regs = original->num_address_regs; 3731 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3732 v->indirect_addr_temps = original->indirect_addr_temps; 3733 v->indirect_addr_consts = original->indirect_addr_consts; 3734 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3735 3736 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3737 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3738 src0 = v->get_temp(glsl_type::vec4_type); 3739 dst0 = st_dst_reg(src0); 3740 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3741 inst->sampler = samplerIndex; 3742 inst->tex_target = TEXTURE_2D_INDEX; 3743 3744 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); 3745 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3746 v->samplers_used |= (1 << samplerIndex); 3747 3748 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3749 src0.negate = NEGATE_XYZW; 3750 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3751 src0.swizzle = SWIZZLE_XXXX; 3752 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3753 3754 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3755 * new visitor. */ 3756 foreach_iter(exec_list_iterator, iter, original->instructions) { 3757 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3758 st_src_reg src_regs[3]; 3759 3760 if (inst->dst.file == PROGRAM_OUTPUT) 3761 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3762 3763 for (int i=0; i<3; i++) { 3764 src_regs[i] = inst->src[i]; 3765 if (src_regs[i].file == PROGRAM_INPUT) 3766 prog->InputsRead |= (1 << src_regs[i].index); 3767 } 3768 3769 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3770 } 3771 3772 /* Make modifications to fragment program info. */ 3773 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3774 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); 3775 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); 3776 count_resources(v, prog); 3777 fp->glsl_to_tgsi = v; 3778} 3779 3780/* ------------------------- TGSI conversion stuff -------------------------- */ 3781struct label { 3782 unsigned branch_target; 3783 unsigned token; 3784}; 3785 3786/** 3787 * Intermediate state used during shader translation. 3788 */ 3789struct st_translate { 3790 struct ureg_program *ureg; 3791 3792 struct ureg_dst temps[MAX_TEMPS]; 3793 struct ureg_src *constants; 3794 struct ureg_src *immediates; 3795 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3796 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3797 struct ureg_dst address[1]; 3798 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3799 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3800 3801 /* Extra info for handling point size clamping in vertex shader */ 3802 struct ureg_dst pointSizeResult; /**< Actual point size output register */ 3803 struct ureg_src pointSizeConst; /**< Point size range constant register */ 3804 GLint pointSizeOutIndex; /**< Temp point size output register */ 3805 GLboolean prevInstWrotePointSize; 3806 3807 const GLuint *inputMapping; 3808 const GLuint *outputMapping; 3809 3810 /* For every instruction that contains a label (eg CALL), keep 3811 * details so that we can go back afterwards and emit the correct 3812 * tgsi instruction number for each label. 3813 */ 3814 struct label *labels; 3815 unsigned labels_size; 3816 unsigned labels_count; 3817 3818 /* Keep a record of the tgsi instruction number that each mesa 3819 * instruction starts at, will be used to fix up labels after 3820 * translation. 3821 */ 3822 unsigned *insn; 3823 unsigned insn_size; 3824 unsigned insn_count; 3825 3826 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3827 3828 boolean error; 3829}; 3830 3831/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3832static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3833 TGSI_SEMANTIC_FACE, 3834 TGSI_SEMANTIC_INSTANCEID 3835}; 3836 3837/** 3838 * Make note of a branch to a label in the TGSI code. 3839 * After we've emitted all instructions, we'll go over the list 3840 * of labels built here and patch the TGSI code with the actual 3841 * location of each label. 3842 */ 3843static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3844{ 3845 unsigned i; 3846 3847 if (t->labels_count + 1 >= t->labels_size) { 3848 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3849 t->labels = (struct label *)realloc(t->labels, 3850 t->labels_size * sizeof(struct label)); 3851 if (t->labels == NULL) { 3852 static unsigned dummy; 3853 t->error = TRUE; 3854 return &dummy; 3855 } 3856 } 3857 3858 i = t->labels_count++; 3859 t->labels[i].branch_target = branch_target; 3860 return &t->labels[i].token; 3861} 3862 3863/** 3864 * Called prior to emitting the TGSI code for each instruction. 3865 * Allocate additional space for instructions if needed. 3866 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3867 * the next TGSI instruction. 3868 */ 3869static void set_insn_start(struct st_translate *t, unsigned start) 3870{ 3871 if (t->insn_count + 1 >= t->insn_size) { 3872 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3873 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3874 if (t->insn == NULL) { 3875 t->error = TRUE; 3876 return; 3877 } 3878 } 3879 3880 t->insn[t->insn_count++] = start; 3881} 3882 3883/** 3884 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 3885 */ 3886static struct ureg_src 3887emit_immediate(struct st_translate *t, 3888 gl_constant_value values[4], 3889 int type, int size) 3890{ 3891 struct ureg_program *ureg = t->ureg; 3892 3893 switch(type) 3894 { 3895 case GL_FLOAT: 3896 return ureg_DECL_immediate(ureg, &values[0].f, size); 3897 case GL_INT: 3898 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 3899 case GL_UNSIGNED_INT: 3900 case GL_BOOL: 3901 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 3902 default: 3903 assert(!"should not get here - type must be float, int, uint, or bool"); 3904 return ureg_src_undef(); 3905 } 3906} 3907 3908/** 3909 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 3910 */ 3911static struct ureg_dst 3912dst_register(struct st_translate *t, 3913 gl_register_file file, 3914 GLuint index) 3915{ 3916 switch(file) { 3917 case PROGRAM_UNDEFINED: 3918 return ureg_dst_undef(); 3919 3920 case PROGRAM_TEMPORARY: 3921 if (ureg_dst_is_undef(t->temps[index])) 3922 t->temps[index] = ureg_DECL_temporary(t->ureg); 3923 3924 return t->temps[index]; 3925 3926 case PROGRAM_OUTPUT: 3927 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) 3928 t->prevInstWrotePointSize = GL_TRUE; 3929 3930 if (t->procType == TGSI_PROCESSOR_VERTEX) 3931 assert(index < VERT_RESULT_MAX); 3932 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 3933 assert(index < FRAG_RESULT_MAX); 3934 else 3935 assert(index < GEOM_RESULT_MAX); 3936 3937 assert(t->outputMapping[index] < Elements(t->outputs)); 3938 3939 return t->outputs[t->outputMapping[index]]; 3940 3941 case PROGRAM_ADDRESS: 3942 return t->address[index]; 3943 3944 default: 3945 assert(!"unknown dst register file"); 3946 return ureg_dst_undef(); 3947 } 3948} 3949 3950/** 3951 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 3952 */ 3953static struct ureg_src 3954src_register(struct st_translate *t, 3955 gl_register_file file, 3956 GLuint index) 3957{ 3958 switch(file) { 3959 case PROGRAM_UNDEFINED: 3960 return ureg_src_undef(); 3961 3962 case PROGRAM_TEMPORARY: 3963 assert(index >= 0); 3964 assert(index < Elements(t->temps)); 3965 if (ureg_dst_is_undef(t->temps[index])) 3966 t->temps[index] = ureg_DECL_temporary(t->ureg); 3967 return ureg_src(t->temps[index]); 3968 3969 case PROGRAM_NAMED_PARAM: 3970 case PROGRAM_ENV_PARAM: 3971 case PROGRAM_LOCAL_PARAM: 3972 case PROGRAM_UNIFORM: 3973 assert(index >= 0); 3974 return t->constants[index]; 3975 case PROGRAM_STATE_VAR: 3976 case PROGRAM_CONSTANT: /* ie, immediate */ 3977 if (index < 0) 3978 return ureg_DECL_constant(t->ureg, 0); 3979 else 3980 return t->constants[index]; 3981 3982 case PROGRAM_IMMEDIATE: 3983 return t->immediates[index]; 3984 3985 case PROGRAM_INPUT: 3986 assert(t->inputMapping[index] < Elements(t->inputs)); 3987 return t->inputs[t->inputMapping[index]]; 3988 3989 case PROGRAM_OUTPUT: 3990 assert(t->outputMapping[index] < Elements(t->outputs)); 3991 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 3992 3993 case PROGRAM_ADDRESS: 3994 return ureg_src(t->address[index]); 3995 3996 case PROGRAM_SYSTEM_VALUE: 3997 assert(index < Elements(t->systemValues)); 3998 return t->systemValues[index]; 3999 4000 default: 4001 assert(!"unknown src register file"); 4002 return ureg_src_undef(); 4003 } 4004} 4005 4006/** 4007 * Create a TGSI ureg_dst register from an st_dst_reg. 4008 */ 4009static struct ureg_dst 4010translate_dst(struct st_translate *t, 4011 const st_dst_reg *dst_reg, 4012 bool saturate) 4013{ 4014 struct ureg_dst dst = dst_register(t, 4015 dst_reg->file, 4016 dst_reg->index); 4017 4018 dst = ureg_writemask(dst, dst_reg->writemask); 4019 4020 if (saturate) 4021 dst = ureg_saturate(dst); 4022 4023 if (dst_reg->reladdr != NULL) 4024 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4025 4026 return dst; 4027} 4028 4029/** 4030 * Create a TGSI ureg_src register from an st_src_reg. 4031 */ 4032static struct ureg_src 4033translate_src(struct st_translate *t, const st_src_reg *src_reg) 4034{ 4035 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4036 4037 src = ureg_swizzle(src, 4038 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4039 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4040 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4041 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4042 4043 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4044 src = ureg_negate(src); 4045 4046 if (src_reg->reladdr != NULL) { 4047 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4048 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4049 * set the bit for src.Negate. So we have to do the operation manually 4050 * here to work around the compiler's problems. */ 4051 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4052 struct ureg_src addr = ureg_src(t->address[0]); 4053 src.Indirect = 1; 4054 src.IndirectFile = addr.File; 4055 src.IndirectIndex = addr.Index; 4056 src.IndirectSwizzle = addr.SwizzleX; 4057 4058 if (src_reg->file != PROGRAM_INPUT && 4059 src_reg->file != PROGRAM_OUTPUT) { 4060 /* If src_reg->index was negative, it was set to zero in 4061 * src_register(). Reassign it now. But don't do this 4062 * for input/output regs since they get remapped while 4063 * const buffers don't. 4064 */ 4065 src.Index = src_reg->index; 4066 } 4067 } 4068 4069 return src; 4070} 4071 4072static void 4073compile_tgsi_instruction(struct st_translate *t, 4074 const struct glsl_to_tgsi_instruction *inst) 4075{ 4076 struct ureg_program *ureg = t->ureg; 4077 GLuint i; 4078 struct ureg_dst dst[1]; 4079 struct ureg_src src[4]; 4080 unsigned num_dst; 4081 unsigned num_src; 4082 4083 num_dst = num_inst_dst_regs(inst->op); 4084 num_src = num_inst_src_regs(inst->op); 4085 4086 if (num_dst) 4087 dst[0] = translate_dst(t, 4088 &inst->dst, 4089 inst->saturate); 4090 4091 for (i = 0; i < num_src; i++) 4092 src[i] = translate_src(t, &inst->src[i]); 4093 4094 switch(inst->op) { 4095 case TGSI_OPCODE_BGNLOOP: 4096 case TGSI_OPCODE_CAL: 4097 case TGSI_OPCODE_ELSE: 4098 case TGSI_OPCODE_ENDLOOP: 4099 case TGSI_OPCODE_IF: 4100 assert(num_dst == 0); 4101 ureg_label_insn(ureg, 4102 inst->op, 4103 src, num_src, 4104 get_label(t, 4105 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4106 return; 4107 4108 case TGSI_OPCODE_TEX: 4109 case TGSI_OPCODE_TXB: 4110 case TGSI_OPCODE_TXD: 4111 case TGSI_OPCODE_TXL: 4112 case TGSI_OPCODE_TXP: 4113 src[num_src++] = t->samplers[inst->sampler]; 4114 ureg_tex_insn(ureg, 4115 inst->op, 4116 dst, num_dst, 4117 translate_texture_target(inst->tex_target, inst->tex_shadow), 4118 src, num_src); 4119 return; 4120 4121 case TGSI_OPCODE_SCS: 4122 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4123 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4124 break; 4125 4126 default: 4127 ureg_insn(ureg, 4128 inst->op, 4129 dst, num_dst, 4130 src, num_src); 4131 break; 4132 } 4133} 4134 4135/** 4136 * Emit the TGSI instructions to adjust the WPOS pixel center convention 4137 * Basically, add (adjX, adjY) to the fragment position. 4138 */ 4139static void 4140emit_adjusted_wpos(struct st_translate *t, 4141 const struct gl_program *program, 4142 float adjX, float adjY) 4143{ 4144 struct ureg_program *ureg = t->ureg; 4145 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 4146 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4147 4148 /* Note that we bias X and Y and pass Z and W through unchanged. 4149 * The shader might also use gl_FragCoord.w and .z. 4150 */ 4151 ureg_ADD(ureg, wpos_temp, wpos_input, 4152 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); 4153 4154 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4155} 4156 4157 4158/** 4159 * Emit the TGSI instructions for inverting the WPOS y coordinate. 4160 * This code is unavoidable because it also depends on whether 4161 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4162 */ 4163static void 4164emit_wpos_inversion(struct st_translate *t, 4165 const struct gl_program *program, 4166 bool invert) 4167{ 4168 struct ureg_program *ureg = t->ureg; 4169 4170 /* Fragment program uses fragment position input. 4171 * Need to replace instances of INPUT[WPOS] with temp T 4172 * where T = INPUT[WPOS] by y is inverted. 4173 */ 4174 static const gl_state_index wposTransformState[STATE_LENGTH] 4175 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4176 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4177 4178 /* XXX: note we are modifying the incoming shader here! Need to 4179 * do this before emitting the constant decls below, or this 4180 * will be missed: 4181 */ 4182 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4183 wposTransformState); 4184 4185 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); 4186 struct ureg_dst wpos_temp; 4187 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4188 4189 /* MOV wpos_temp, input[wpos] 4190 */ 4191 if (wpos_input.File == TGSI_FILE_TEMPORARY) 4192 wpos_temp = ureg_dst(wpos_input); 4193 else { 4194 wpos_temp = ureg_DECL_temporary(ureg); 4195 ureg_MOV(ureg, wpos_temp, wpos_input); 4196 } 4197 4198 if (invert) { 4199 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4200 */ 4201 ureg_MAD(ureg, 4202 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4203 wpos_input, 4204 ureg_scalar(wpostrans, 0), 4205 ureg_scalar(wpostrans, 1)); 4206 } else { 4207 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4208 */ 4209 ureg_MAD(ureg, 4210 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 4211 wpos_input, 4212 ureg_scalar(wpostrans, 2), 4213 ureg_scalar(wpostrans, 3)); 4214 } 4215 4216 /* Use wpos_temp as position input from here on: 4217 */ 4218 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4219} 4220 4221 4222/** 4223 * Emit fragment position/ooordinate code. 4224 */ 4225static void 4226emit_wpos(struct st_context *st, 4227 struct st_translate *t, 4228 const struct gl_program *program, 4229 struct ureg_program *ureg) 4230{ 4231 const struct gl_fragment_program *fp = 4232 (const struct gl_fragment_program *) program; 4233 struct pipe_screen *pscreen = st->pipe->screen; 4234 boolean invert = FALSE; 4235 4236 if (fp->OriginUpperLeft) { 4237 /* Fragment shader wants origin in upper-left */ 4238 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4239 /* the driver supports upper-left origin */ 4240 } 4241 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4242 /* the driver supports lower-left origin, need to invert Y */ 4243 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4244 invert = TRUE; 4245 } 4246 else 4247 assert(0); 4248 } 4249 else { 4250 /* Fragment shader wants origin in lower-left */ 4251 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4252 /* the driver supports lower-left origin */ 4253 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4254 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4255 /* the driver supports upper-left origin, need to invert Y */ 4256 invert = TRUE; 4257 else 4258 assert(0); 4259 } 4260 4261 if (fp->PixelCenterInteger) { 4262 /* Fragment shader wants pixel center integer */ 4263 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) 4264 /* the driver supports pixel center integer */ 4265 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4266 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) 4267 /* the driver supports pixel center half integer, need to bias X,Y */ 4268 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); 4269 else 4270 assert(0); 4271 } 4272 else { 4273 /* Fragment shader wants pixel center half integer */ 4274 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4275 /* the driver supports pixel center half integer */ 4276 } 4277 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4278 /* the driver supports pixel center integer, need to bias X,Y */ 4279 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4280 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); 4281 } 4282 else 4283 assert(0); 4284 } 4285 4286 /* we invert after adjustment so that we avoid the MOV to temporary, 4287 * and reuse the adjustment ADD instead */ 4288 emit_wpos_inversion(t, program, invert); 4289} 4290 4291/** 4292 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4293 * TGSI uses +1 for front, -1 for back. 4294 * This function converts the TGSI value to the GL value. Simply clamping/ 4295 * saturating the value to [0,1] does the job. 4296 */ 4297static void 4298emit_face_var(struct st_translate *t) 4299{ 4300 struct ureg_program *ureg = t->ureg; 4301 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4302 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4303 4304 /* MOV_SAT face_temp, input[face] */ 4305 face_temp = ureg_saturate(face_temp); 4306 ureg_MOV(ureg, face_temp, face_input); 4307 4308 /* Use face_temp as face input from here on: */ 4309 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4310} 4311 4312static void 4313emit_edgeflags(struct st_translate *t) 4314{ 4315 struct ureg_program *ureg = t->ureg; 4316 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4317 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4318 4319 ureg_MOV(ureg, edge_dst, edge_src); 4320} 4321 4322/** 4323 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4324 * \param program the program to translate 4325 * \param numInputs number of input registers used 4326 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4327 * input indexes 4328 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4329 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4330 * each input 4331 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4332 * \param numOutputs number of output registers used 4333 * \param outputMapping maps Mesa fragment program outputs to TGSI 4334 * generic outputs 4335 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4336 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4337 * each output 4338 * 4339 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4340 */ 4341extern "C" enum pipe_error 4342st_translate_program( 4343 struct gl_context *ctx, 4344 uint procType, 4345 struct ureg_program *ureg, 4346 glsl_to_tgsi_visitor *program, 4347 const struct gl_program *proginfo, 4348 GLuint numInputs, 4349 const GLuint inputMapping[], 4350 const ubyte inputSemanticName[], 4351 const ubyte inputSemanticIndex[], 4352 const GLuint interpMode[], 4353 GLuint numOutputs, 4354 const GLuint outputMapping[], 4355 const ubyte outputSemanticName[], 4356 const ubyte outputSemanticIndex[], 4357 boolean passthrough_edgeflags) 4358{ 4359 struct st_translate translate, *t; 4360 unsigned i; 4361 enum pipe_error ret = PIPE_OK; 4362 4363 assert(numInputs <= Elements(t->inputs)); 4364 assert(numOutputs <= Elements(t->outputs)); 4365 4366 t = &translate; 4367 memset(t, 0, sizeof *t); 4368 4369 t->procType = procType; 4370 t->inputMapping = inputMapping; 4371 t->outputMapping = outputMapping; 4372 t->ureg = ureg; 4373 t->pointSizeOutIndex = -1; 4374 t->prevInstWrotePointSize = GL_FALSE; 4375 4376 /* 4377 * Declare input attributes. 4378 */ 4379 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4380 for (i = 0; i < numInputs; i++) { 4381 t->inputs[i] = ureg_DECL_fs_input(ureg, 4382 inputSemanticName[i], 4383 inputSemanticIndex[i], 4384 interpMode[i]); 4385 } 4386 4387 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4388 /* Must do this after setting up t->inputs, and before 4389 * emitting constant references, below: 4390 */ 4391 emit_wpos(st_context(ctx), t, proginfo, ureg); 4392 } 4393 4394 if (proginfo->InputsRead & FRAG_BIT_FACE) 4395 emit_face_var(t); 4396 4397 /* 4398 * Declare output attributes. 4399 */ 4400 for (i = 0; i < numOutputs; i++) { 4401 switch (outputSemanticName[i]) { 4402 case TGSI_SEMANTIC_POSITION: 4403 t->outputs[i] = ureg_DECL_output(ureg, 4404 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4405 outputSemanticIndex[i]); 4406 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4407 break; 4408 case TGSI_SEMANTIC_STENCIL: 4409 t->outputs[i] = ureg_DECL_output(ureg, 4410 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4411 outputSemanticIndex[i]); 4412 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4413 break; 4414 case TGSI_SEMANTIC_COLOR: 4415 t->outputs[i] = ureg_DECL_output(ureg, 4416 TGSI_SEMANTIC_COLOR, 4417 outputSemanticIndex[i]); 4418 break; 4419 default: 4420 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4421 return PIPE_ERROR_BAD_INPUT; 4422 } 4423 } 4424 } 4425 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4426 for (i = 0; i < numInputs; i++) { 4427 t->inputs[i] = ureg_DECL_gs_input(ureg, 4428 i, 4429 inputSemanticName[i], 4430 inputSemanticIndex[i]); 4431 } 4432 4433 for (i = 0; i < numOutputs; i++) { 4434 t->outputs[i] = ureg_DECL_output(ureg, 4435 outputSemanticName[i], 4436 outputSemanticIndex[i]); 4437 } 4438 } 4439 else { 4440 assert(procType == TGSI_PROCESSOR_VERTEX); 4441 4442 for (i = 0; i < numInputs; i++) { 4443 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4444 } 4445 4446 for (i = 0; i < numOutputs; i++) { 4447 t->outputs[i] = ureg_DECL_output(ureg, 4448 outputSemanticName[i], 4449 outputSemanticIndex[i]); 4450 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { 4451 /* Writing to the point size result register requires special 4452 * handling to implement clamping. 4453 */ 4454 static const gl_state_index pointSizeClampState[STATE_LENGTH] 4455 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4456 /* XXX: note we are modifying the incoming shader here! Need to 4457 * do this before emitting the constant decls below, or this 4458 * will be missed. 4459 */ 4460 unsigned pointSizeClampConst = 4461 _mesa_add_state_reference(proginfo->Parameters, 4462 pointSizeClampState); 4463 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); 4464 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); 4465 t->pointSizeResult = t->outputs[i]; 4466 t->pointSizeOutIndex = i; 4467 t->outputs[i] = psizregtemp; 4468 } 4469 } 4470 if (passthrough_edgeflags) 4471 emit_edgeflags(t); 4472 } 4473 4474 /* Declare address register. 4475 */ 4476 if (program->num_address_regs > 0) { 4477 assert(program->num_address_regs == 1); 4478 t->address[0] = ureg_DECL_address(ureg); 4479 } 4480 4481 /* Declare misc input registers 4482 */ 4483 { 4484 GLbitfield sysInputs = proginfo->SystemValuesRead; 4485 unsigned numSys = 0; 4486 for (i = 0; sysInputs; i++) { 4487 if (sysInputs & (1 << i)) { 4488 unsigned semName = mesa_sysval_to_semantic[i]; 4489 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4490 numSys++; 4491 sysInputs &= ~(1 << i); 4492 } 4493 } 4494 } 4495 4496 if (program->indirect_addr_temps) { 4497 /* If temps are accessed with indirect addressing, declare temporaries 4498 * in sequential order. Else, we declare them on demand elsewhere. 4499 * (Note: the number of temporaries is equal to program->next_temp) 4500 */ 4501 for (i = 0; i < (unsigned)program->next_temp; i++) { 4502 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4503 t->temps[i] = ureg_DECL_temporary(t->ureg); 4504 } 4505 } 4506 4507 /* Emit constants and uniforms. TGSI uses a single index space for these, 4508 * so we put all the translated regs in t->constants. 4509 */ 4510 if (proginfo->Parameters) { 4511 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4512 if (t->constants == NULL) { 4513 ret = PIPE_ERROR_OUT_OF_MEMORY; 4514 goto out; 4515 } 4516 4517 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4518 switch (proginfo->Parameters->Parameters[i].Type) { 4519 case PROGRAM_ENV_PARAM: 4520 case PROGRAM_LOCAL_PARAM: 4521 case PROGRAM_STATE_VAR: 4522 case PROGRAM_NAMED_PARAM: 4523 case PROGRAM_UNIFORM: 4524 t->constants[i] = ureg_DECL_constant(ureg, i); 4525 break; 4526 4527 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4528 * addressing of the const buffer. 4529 * FIXME: Be smarter and recognize param arrays: 4530 * indirect addressing is only valid within the referenced 4531 * array. 4532 */ 4533 case PROGRAM_CONSTANT: 4534 if (program->indirect_addr_consts) 4535 t->constants[i] = ureg_DECL_constant(ureg, i); 4536 else 4537 t->constants[i] = emit_immediate(t, 4538 proginfo->Parameters->ParameterValues[i], 4539 proginfo->Parameters->Parameters[i].DataType, 4540 4); 4541 break; 4542 default: 4543 break; 4544 } 4545 } 4546 } 4547 4548 /* Emit immediate values. 4549 */ 4550 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4551 if (t->immediates == NULL) { 4552 ret = PIPE_ERROR_OUT_OF_MEMORY; 4553 goto out; 4554 } 4555 i = 0; 4556 foreach_iter(exec_list_iterator, iter, program->immediates) { 4557 immediate_storage *imm = (immediate_storage *)iter.get(); 4558 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4559 } 4560 4561 /* texture samplers */ 4562 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4563 if (program->samplers_used & (1 << i)) { 4564 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4565 } 4566 } 4567 4568 /* Emit each instruction in turn: 4569 */ 4570 foreach_iter(exec_list_iterator, iter, program->instructions) { 4571 set_insn_start(t, ureg_get_instruction_number(ureg)); 4572 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); 4573 4574 if (t->prevInstWrotePointSize && proginfo->Id) { 4575 /* The previous instruction wrote to the (fake) vertex point size 4576 * result register. Now we need to clamp that value to the min/max 4577 * point size range, putting the result into the real point size 4578 * register. 4579 * Note that we can't do this easily at the end of program due to 4580 * possible early return. 4581 */ 4582 set_insn_start(t, ureg_get_instruction_number(ureg)); 4583 ureg_MAX(t->ureg, 4584 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), 4585 ureg_src(t->outputs[t->pointSizeOutIndex]), 4586 ureg_swizzle(t->pointSizeConst, 1,1,1,1)); 4587 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), 4588 ureg_src(t->outputs[t->pointSizeOutIndex]), 4589 ureg_swizzle(t->pointSizeConst, 2,2,2,2)); 4590 } 4591 t->prevInstWrotePointSize = GL_FALSE; 4592 } 4593 4594 /* Fix up all emitted labels: 4595 */ 4596 for (i = 0; i < t->labels_count; i++) { 4597 ureg_fixup_label(ureg, t->labels[i].token, 4598 t->insn[t->labels[i].branch_target]); 4599 } 4600 4601out: 4602 FREE(t->insn); 4603 FREE(t->labels); 4604 FREE(t->constants); 4605 FREE(t->immediates); 4606 4607 if (t->error) { 4608 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4609 } 4610 4611 return ret; 4612} 4613/* ----------------------------- End TGSI code ------------------------------ */ 4614 4615/** 4616 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4617 * generating Mesa IR. 4618 */ 4619static struct gl_program * 4620get_mesa_program(struct gl_context *ctx, 4621 struct gl_shader_program *shader_program, 4622 struct gl_shader *shader) 4623{ 4624 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4625 struct gl_program *prog; 4626 GLenum target; 4627 const char *target_string; 4628 bool progress; 4629 struct gl_shader_compiler_options *options = 4630 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4631 4632 switch (shader->Type) { 4633 case GL_VERTEX_SHADER: 4634 target = GL_VERTEX_PROGRAM_ARB; 4635 target_string = "vertex"; 4636 break; 4637 case GL_FRAGMENT_SHADER: 4638 target = GL_FRAGMENT_PROGRAM_ARB; 4639 target_string = "fragment"; 4640 break; 4641 case GL_GEOMETRY_SHADER: 4642 target = GL_GEOMETRY_PROGRAM_NV; 4643 target_string = "geometry"; 4644 break; 4645 default: 4646 assert(!"should not be reached"); 4647 return NULL; 4648 } 4649 4650 validate_ir_tree(shader->ir); 4651 4652 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4653 if (!prog) 4654 return NULL; 4655 prog->Parameters = _mesa_new_parameter_list(); 4656 prog->Varying = _mesa_new_parameter_list(); 4657 prog->Attributes = _mesa_new_parameter_list(); 4658 v->ctx = ctx; 4659 v->prog = prog; 4660 v->shader_program = shader_program; 4661 v->options = options; 4662 v->glsl_version = ctx->Const.GLSLVersion; 4663 4664 add_uniforms_to_parameters_list(shader_program, shader, prog); 4665 4666 /* Emit intermediate IR for main(). */ 4667 visit_exec_list(shader->ir, v); 4668 4669 /* Now emit bodies for any functions that were used. */ 4670 do { 4671 progress = GL_FALSE; 4672 4673 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4674 function_entry *entry = (function_entry *)iter.get(); 4675 4676 if (!entry->bgn_inst) { 4677 v->current_function = entry; 4678 4679 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4680 entry->bgn_inst->function = entry; 4681 4682 visit_exec_list(&entry->sig->body, v); 4683 4684 glsl_to_tgsi_instruction *last; 4685 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4686 if (last->op != TGSI_OPCODE_RET) 4687 v->emit(NULL, TGSI_OPCODE_RET); 4688 4689 glsl_to_tgsi_instruction *end; 4690 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4691 end->function = entry; 4692 4693 progress = GL_TRUE; 4694 } 4695 } 4696 } while (progress); 4697 4698#if 0 4699 /* Print out some information (for debugging purposes) used by the 4700 * optimization passes. */ 4701 for (i=0; i < v->next_temp; i++) { 4702 int fr = v->get_first_temp_read(i); 4703 int fw = v->get_first_temp_write(i); 4704 int lr = v->get_last_temp_read(i); 4705 int lw = v->get_last_temp_write(i); 4706 4707 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4708 assert(fw <= fr); 4709 } 4710#endif 4711 4712 /* Remove reads to output registers, and to varyings in vertex shaders. */ 4713 v->remove_output_reads(PROGRAM_OUTPUT); 4714 if (target == GL_VERTEX_PROGRAM_ARB) 4715 v->remove_output_reads(PROGRAM_VARYING); 4716 4717 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4718 v->simplify_cmp(); 4719 v->copy_propagate(); 4720 while (v->eliminate_dead_code_advanced()); 4721 4722 /* FIXME: These passes to optimize temporary registers don't work when there 4723 * is indirect addressing of the temporary register space. We need proper 4724 * array support so that we don't have to give up these passes in every 4725 * shader that uses arrays. 4726 */ 4727 if (!v->indirect_addr_temps) { 4728 v->eliminate_dead_code(); 4729 v->merge_registers(); 4730 v->renumber_registers(); 4731 } 4732 4733 /* Write the END instruction. */ 4734 v->emit(NULL, TGSI_OPCODE_END); 4735 4736 if (ctx->Shader.Flags & GLSL_DUMP) { 4737 printf("\n"); 4738 printf("GLSL IR for linked %s program %d:\n", target_string, 4739 shader_program->Name); 4740 _mesa_print_ir(shader->ir, NULL); 4741 printf("\n"); 4742 printf("\n"); 4743 } 4744 4745 prog->Instructions = NULL; 4746 prog->NumInstructions = 0; 4747 4748 do_set_program_inouts(shader->ir, prog); 4749 count_resources(v, prog); 4750 4751 check_resources(ctx, shader_program, v, prog); 4752 4753 _mesa_reference_program(ctx, &shader->Program, prog); 4754 4755 struct st_vertex_program *stvp; 4756 struct st_fragment_program *stfp; 4757 struct st_geometry_program *stgp; 4758 4759 switch (shader->Type) { 4760 case GL_VERTEX_SHADER: 4761 stvp = (struct st_vertex_program *)prog; 4762 stvp->glsl_to_tgsi = v; 4763 break; 4764 case GL_FRAGMENT_SHADER: 4765 stfp = (struct st_fragment_program *)prog; 4766 stfp->glsl_to_tgsi = v; 4767 break; 4768 case GL_GEOMETRY_SHADER: 4769 stgp = (struct st_geometry_program *)prog; 4770 stgp->glsl_to_tgsi = v; 4771 break; 4772 default: 4773 assert(!"should not be reached"); 4774 return NULL; 4775 } 4776 4777 return prog; 4778} 4779 4780extern "C" { 4781 4782struct gl_shader * 4783st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4784{ 4785 struct gl_shader *shader; 4786 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4787 type == GL_GEOMETRY_SHADER_ARB); 4788 shader = rzalloc(NULL, struct gl_shader); 4789 if (shader) { 4790 shader->Type = type; 4791 shader->Name = name; 4792 _mesa_init_shader(ctx, shader); 4793 } 4794 return shader; 4795} 4796 4797struct gl_shader_program * 4798st_new_shader_program(struct gl_context *ctx, GLuint name) 4799{ 4800 struct gl_shader_program *shProg; 4801 shProg = rzalloc(NULL, struct gl_shader_program); 4802 if (shProg) { 4803 shProg->Name = name; 4804 _mesa_init_shader_program(ctx, shProg); 4805 } 4806 return shProg; 4807} 4808 4809/** 4810 * Link a shader. 4811 * Called via ctx->Driver.LinkShader() 4812 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4813 * with code lowering and other optimizations. 4814 */ 4815GLboolean 4816st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4817{ 4818 assert(prog->LinkStatus); 4819 4820 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4821 if (prog->_LinkedShaders[i] == NULL) 4822 continue; 4823 4824 bool progress; 4825 exec_list *ir = prog->_LinkedShaders[i]->ir; 4826 const struct gl_shader_compiler_options *options = 4827 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4828 4829 do { 4830 progress = false; 4831 4832 /* Lowering */ 4833 do_mat_op_to_vec(ir); 4834 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 4835 | LOG_TO_LOG2 4836 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 4837 4838 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 4839 4840 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; 4841 4842 progress = lower_quadop_vector(ir, false) || progress; 4843 4844 if (options->EmitNoIfs) { 4845 progress = lower_discard(ir) || progress; 4846 progress = lower_if_to_cond_assign(ir) || progress; 4847 } 4848 4849 if (options->EmitNoNoise) 4850 progress = lower_noise(ir) || progress; 4851 4852 /* If there are forms of indirect addressing that the driver 4853 * cannot handle, perform the lowering pass. 4854 */ 4855 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 4856 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 4857 progress = 4858 lower_variable_index_to_cond_assign(ir, 4859 options->EmitNoIndirectInput, 4860 options->EmitNoIndirectOutput, 4861 options->EmitNoIndirectTemp, 4862 options->EmitNoIndirectUniform) 4863 || progress; 4864 4865 progress = do_vec_index_to_cond_assign(ir) || progress; 4866 } while (progress); 4867 4868 validate_ir_tree(ir); 4869 } 4870 4871 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4872 struct gl_program *linked_prog; 4873 4874 if (prog->_LinkedShaders[i] == NULL) 4875 continue; 4876 4877 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 4878 4879 if (linked_prog) { 4880 bool ok = true; 4881 4882 switch (prog->_LinkedShaders[i]->Type) { 4883 case GL_VERTEX_SHADER: 4884 _mesa_reference_vertprog(ctx, &prog->VertexProgram, 4885 (struct gl_vertex_program *)linked_prog); 4886 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, 4887 linked_prog); 4888 break; 4889 case GL_FRAGMENT_SHADER: 4890 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, 4891 (struct gl_fragment_program *)linked_prog); 4892 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, 4893 linked_prog); 4894 break; 4895 case GL_GEOMETRY_SHADER: 4896 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, 4897 (struct gl_geometry_program *)linked_prog); 4898 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, 4899 linked_prog); 4900 break; 4901 } 4902 if (!ok) { 4903 return GL_FALSE; 4904 } 4905 } 4906 4907 _mesa_reference_program(ctx, &linked_prog, NULL); 4908 } 4909 4910 return GL_TRUE; 4911} 4912 4913 4914/** 4915 * Link a GLSL shader program. Called via glLinkProgram(). 4916 */ 4917void 4918st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4919{ 4920 unsigned int i; 4921 4922 _mesa_clear_shader_program_data(ctx, prog); 4923 4924 prog->LinkStatus = GL_TRUE; 4925 4926 for (i = 0; i < prog->NumShaders; i++) { 4927 if (!prog->Shaders[i]->CompileStatus) { 4928 fail_link(prog, "linking with uncompiled shader"); 4929 prog->LinkStatus = GL_FALSE; 4930 } 4931 } 4932 4933 prog->Varying = _mesa_new_parameter_list(); 4934 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); 4935 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); 4936 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); 4937 4938 if (prog->LinkStatus) { 4939 link_shaders(ctx, prog); 4940 } 4941 4942 if (prog->LinkStatus) { 4943 if (!ctx->Driver.LinkShader(ctx, prog)) { 4944 prog->LinkStatus = GL_FALSE; 4945 } 4946 } 4947 4948 set_uniform_initializers(ctx, prog); 4949 4950 if (ctx->Shader.Flags & GLSL_DUMP) { 4951 if (!prog->LinkStatus) { 4952 printf("GLSL shader program %d failed to link\n", prog->Name); 4953 } 4954 4955 if (prog->InfoLog && prog->InfoLog[0] != 0) { 4956 printf("GLSL shader program %d info log:\n", prog->Name); 4957 printf("%s\n", prog->InfoLog); 4958 } 4959 } 4960} 4961 4962} /* extern "C" */ 4963