st_glsl_to_tgsi.cpp revision 49468a1b2a241d5a6a1155f79b48fa6562524206
1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include <stdio.h> 34#include "main/compiler.h" 35#include "ir.h" 36#include "ir_visitor.h" 37#include "ir_print_visitor.h" 38#include "ir_expression_flattening.h" 39#include "glsl_types.h" 40#include "glsl_parser_extras.h" 41#include "../glsl/program.h" 42#include "ir_optimization.h" 43#include "ast.h" 44 45#include "main/mtypes.h" 46#include "main/shaderobj.h" 47#include "program/hash_table.h" 48 49extern "C" { 50#include "main/shaderapi.h" 51#include "main/uniforms.h" 52#include "program/prog_instruction.h" 53#include "program/prog_optimize.h" 54#include "program/prog_print.h" 55#include "program/program.h" 56#include "program/prog_parameter.h" 57#include "program/sampler.h" 58 59#include "pipe/p_compiler.h" 60#include "pipe/p_context.h" 61#include "pipe/p_screen.h" 62#include "pipe/p_shader_tokens.h" 63#include "pipe/p_state.h" 64#include "util/u_math.h" 65#include "tgsi/tgsi_ureg.h" 66#include "tgsi/tgsi_info.h" 67#include "st_context.h" 68#include "st_program.h" 69#include "st_glsl_to_tgsi.h" 70#include "st_mesa_to_tgsi.h" 71} 72 73#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81/** 82 * Maximum number of temporary registers. 83 * 84 * It is too big for stack allocated arrays -- it will cause stack overflow on 85 * Windows and likely Mac OS X. 86 */ 87#define MAX_TEMPS 4096 88 89/* will be 4 for GLSL 4.00 */ 90#define MAX_GLSL_TEXTURE_OFFSET 1 91 92class st_src_reg; 93class st_dst_reg; 94 95static int swizzle_for_size(int size); 96 97/** 98 * This struct is a corresponding struct to TGSI ureg_src. 99 */ 100class st_src_reg { 101public: 102 st_src_reg(gl_register_file file, int index, const glsl_type *type) 103 { 104 this->file = file; 105 this->index = index; 106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 107 this->swizzle = swizzle_for_size(type->vector_elements); 108 else 109 this->swizzle = SWIZZLE_XYZW; 110 this->negate = 0; 111 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 112 this->reladdr = NULL; 113 } 114 115 st_src_reg(gl_register_file file, int index, int type) 116 { 117 this->type = type; 118 this->file = file; 119 this->index = index; 120 this->swizzle = SWIZZLE_XYZW; 121 this->negate = 0; 122 this->reladdr = NULL; 123 } 124 125 st_src_reg() 126 { 127 this->type = GLSL_TYPE_ERROR; 128 this->file = PROGRAM_UNDEFINED; 129 this->index = 0; 130 this->swizzle = 0; 131 this->negate = 0; 132 this->reladdr = NULL; 133 } 134 135 explicit st_src_reg(st_dst_reg reg); 136 137 gl_register_file file; /**< PROGRAM_* from Mesa */ 138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 140 int negate; /**< NEGATE_XYZW mask from mesa */ 141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 142 /** Register index should be offset by the integer in this reg. */ 143 st_src_reg *reladdr; 144}; 145 146class st_dst_reg { 147public: 148 st_dst_reg(gl_register_file file, int writemask, int type) 149 { 150 this->file = file; 151 this->index = 0; 152 this->writemask = writemask; 153 this->cond_mask = COND_TR; 154 this->reladdr = NULL; 155 this->type = type; 156 } 157 158 st_dst_reg() 159 { 160 this->type = GLSL_TYPE_ERROR; 161 this->file = PROGRAM_UNDEFINED; 162 this->index = 0; 163 this->writemask = 0; 164 this->cond_mask = COND_TR; 165 this->reladdr = NULL; 166 } 167 168 explicit st_dst_reg(st_src_reg reg); 169 170 gl_register_file file; /**< PROGRAM_* from Mesa */ 171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173 GLuint cond_mask:4; 174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175 /** Register index should be offset by the integer in this reg. */ 176 st_src_reg *reladdr; 177}; 178 179st_src_reg::st_src_reg(st_dst_reg reg) 180{ 181 this->type = reg.type; 182 this->file = reg.file; 183 this->index = reg.index; 184 this->swizzle = SWIZZLE_XYZW; 185 this->negate = 0; 186 this->reladdr = reg.reladdr; 187} 188 189st_dst_reg::st_dst_reg(st_src_reg reg) 190{ 191 this->type = reg.type; 192 this->file = reg.file; 193 this->index = reg.index; 194 this->writemask = WRITEMASK_XYZW; 195 this->cond_mask = COND_TR; 196 this->reladdr = reg.reladdr; 197} 198 199class glsl_to_tgsi_instruction : public exec_node { 200public: 201 /* Callers of this ralloc-based new need not call delete. It's 202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203 static void* operator new(size_t size, void *ctx) 204 { 205 void *node; 206 207 node = rzalloc_size(ctx, size); 208 assert(node != NULL); 209 210 return node; 211 } 212 213 unsigned op; 214 st_dst_reg dst; 215 st_src_reg src[3]; 216 /** Pointer to the ir source this tree came from for debugging */ 217 ir_instruction *ir; 218 GLboolean cond_update; 219 bool saturate; 220 int sampler; /**< sampler index */ 221 int tex_target; /**< One of TEXTURE_*_INDEX */ 222 GLboolean tex_shadow; 223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 224 unsigned tex_offset_num_offset; 225 int dead_mask; /**< Used in dead code elimination */ 226 227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228}; 229 230class variable_storage : public exec_node { 231public: 232 variable_storage(ir_variable *var, gl_register_file file, int index) 233 : file(file), index(index), var(var) 234 { 235 /* empty */ 236 } 237 238 gl_register_file file; 239 int index; 240 ir_variable *var; /* variable that maps to this, if any */ 241}; 242 243class immediate_storage : public exec_node { 244public: 245 immediate_storage(gl_constant_value *values, int size, int type) 246 { 247 memcpy(this->values, values, size * sizeof(gl_constant_value)); 248 this->size = size; 249 this->type = type; 250 } 251 252 gl_constant_value values[4]; 253 int size; /**< Number of components (1-4) */ 254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255}; 256 257class function_entry : public exec_node { 258public: 259 ir_function_signature *sig; 260 261 /** 262 * identifier of this function signature used by the program. 263 * 264 * At the point that TGSI instructions for function calls are 265 * generated, we don't know the address of the first instruction of 266 * the function body. So we make the BranchTarget that is called a 267 * small integer and rewrite them during set_branchtargets(). 268 */ 269 int sig_id; 270 271 /** 272 * Pointer to first instruction of the function body. 273 * 274 * Set during function body emits after main() is processed. 275 */ 276 glsl_to_tgsi_instruction *bgn_inst; 277 278 /** 279 * Index of the first instruction of the function body in actual TGSI. 280 * 281 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 282 */ 283 int inst; 284 285 /** Storage for the return value. */ 286 st_src_reg return_reg; 287}; 288 289class glsl_to_tgsi_visitor : public ir_visitor { 290public: 291 glsl_to_tgsi_visitor(); 292 ~glsl_to_tgsi_visitor(); 293 294 function_entry *current_function; 295 296 struct gl_context *ctx; 297 struct gl_program *prog; 298 struct gl_shader_program *shader_program; 299 struct gl_shader_compiler_options *options; 300 301 int next_temp; 302 303 int num_address_regs; 304 int samplers_used; 305 bool indirect_addr_temps; 306 bool indirect_addr_consts; 307 int num_clip_distances; 308 309 int glsl_version; 310 bool native_integers; 311 312 variable_storage *find_variable_storage(ir_variable *var); 313 314 int add_constant(gl_register_file file, gl_constant_value values[4], 315 int size, int datatype, GLuint *swizzle_out); 316 317 function_entry *get_function_signature(ir_function_signature *sig); 318 319 st_src_reg get_temp(const glsl_type *type); 320 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 321 322 st_src_reg st_src_reg_for_float(float val); 323 st_src_reg st_src_reg_for_int(int val); 324 st_src_reg st_src_reg_for_type(int type, int val); 325 326 /** 327 * \name Visit methods 328 * 329 * As typical for the visitor pattern, there must be one \c visit method for 330 * each concrete subclass of \c ir_instruction. Virtual base classes within 331 * the hierarchy should not have \c visit methods. 332 */ 333 /*@{*/ 334 virtual void visit(ir_variable *); 335 virtual void visit(ir_loop *); 336 virtual void visit(ir_loop_jump *); 337 virtual void visit(ir_function_signature *); 338 virtual void visit(ir_function *); 339 virtual void visit(ir_expression *); 340 virtual void visit(ir_swizzle *); 341 virtual void visit(ir_dereference_variable *); 342 virtual void visit(ir_dereference_array *); 343 virtual void visit(ir_dereference_record *); 344 virtual void visit(ir_assignment *); 345 virtual void visit(ir_constant *); 346 virtual void visit(ir_call *); 347 virtual void visit(ir_return *); 348 virtual void visit(ir_discard *); 349 virtual void visit(ir_texture *); 350 virtual void visit(ir_if *); 351 /*@}*/ 352 353 st_src_reg result; 354 355 /** List of variable_storage */ 356 exec_list variables; 357 358 /** List of immediate_storage */ 359 exec_list immediates; 360 unsigned num_immediates; 361 362 /** List of function_entry */ 363 exec_list function_signatures; 364 int next_signature_id; 365 366 /** List of glsl_to_tgsi_instruction */ 367 exec_list instructions; 368 369 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 370 371 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 372 st_dst_reg dst, st_src_reg src0); 373 374 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 375 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 376 377 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 378 st_dst_reg dst, 379 st_src_reg src0, st_src_reg src1, st_src_reg src2); 380 381 unsigned get_opcode(ir_instruction *ir, unsigned op, 382 st_dst_reg dst, 383 st_src_reg src0, st_src_reg src1); 384 385 /** 386 * Emit the correct dot-product instruction for the type of arguments 387 */ 388 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 389 st_dst_reg dst, 390 st_src_reg src0, 391 st_src_reg src1, 392 unsigned elements); 393 394 void emit_scalar(ir_instruction *ir, unsigned op, 395 st_dst_reg dst, st_src_reg src0); 396 397 void emit_scalar(ir_instruction *ir, unsigned op, 398 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 399 400 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 401 402 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 403 404 void emit_scs(ir_instruction *ir, unsigned op, 405 st_dst_reg dst, const st_src_reg &src); 406 407 bool try_emit_mad(ir_expression *ir, 408 int mul_operand); 409 bool try_emit_mad_for_and_not(ir_expression *ir, 410 int mul_operand); 411 bool try_emit_sat(ir_expression *ir); 412 413 void emit_swz(ir_expression *ir); 414 415 bool process_move_condition(ir_rvalue *ir); 416 417 void simplify_cmp(void); 418 419 void rename_temp_register(int index, int new_index); 420 int get_first_temp_read(int index); 421 int get_first_temp_write(int index); 422 int get_last_temp_read(int index); 423 int get_last_temp_write(int index); 424 425 void copy_propagate(void); 426 void eliminate_dead_code(void); 427 int eliminate_dead_code_advanced(void); 428 void merge_registers(void); 429 void renumber_registers(void); 430 431 void *mem_ctx; 432}; 433 434static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 435 436static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 437 438static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 439 440static void 441fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 442 443static void 444fail_link(struct gl_shader_program *prog, const char *fmt, ...) 445{ 446 va_list args; 447 va_start(args, fmt); 448 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 449 va_end(args); 450 451 prog->LinkStatus = GL_FALSE; 452} 453 454static int 455swizzle_for_size(int size) 456{ 457 int size_swizzles[4] = { 458 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 461 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 462 }; 463 464 assert((size >= 1) && (size <= 4)); 465 return size_swizzles[size - 1]; 466} 467 468static bool 469is_tex_instruction(unsigned opcode) 470{ 471 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 472 return info->is_tex; 473} 474 475static unsigned 476num_inst_dst_regs(unsigned opcode) 477{ 478 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 479 return info->num_dst; 480} 481 482static unsigned 483num_inst_src_regs(unsigned opcode) 484{ 485 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 486 return info->is_tex ? info->num_src - 1 : info->num_src; 487} 488 489glsl_to_tgsi_instruction * 490glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 491 st_dst_reg dst, 492 st_src_reg src0, st_src_reg src1, st_src_reg src2) 493{ 494 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 495 int num_reladdr = 0, i; 496 497 op = get_opcode(ir, op, dst, src0, src1); 498 499 /* If we have to do relative addressing, we want to load the ARL 500 * reg directly for one of the regs, and preload the other reladdr 501 * sources into temps. 502 */ 503 num_reladdr += dst.reladdr != NULL; 504 num_reladdr += src0.reladdr != NULL; 505 num_reladdr += src1.reladdr != NULL; 506 num_reladdr += src2.reladdr != NULL; 507 508 reladdr_to_temp(ir, &src2, &num_reladdr); 509 reladdr_to_temp(ir, &src1, &num_reladdr); 510 reladdr_to_temp(ir, &src0, &num_reladdr); 511 512 if (dst.reladdr) { 513 emit_arl(ir, address_reg, *dst.reladdr); 514 num_reladdr--; 515 } 516 assert(num_reladdr == 0); 517 518 inst->op = op; 519 inst->dst = dst; 520 inst->src[0] = src0; 521 inst->src[1] = src1; 522 inst->src[2] = src2; 523 inst->ir = ir; 524 inst->dead_mask = 0; 525 526 inst->function = NULL; 527 528 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 529 this->num_address_regs = 1; 530 531 /* Update indirect addressing status used by TGSI */ 532 if (dst.reladdr) { 533 switch(dst.file) { 534 case PROGRAM_TEMPORARY: 535 this->indirect_addr_temps = true; 536 break; 537 case PROGRAM_LOCAL_PARAM: 538 case PROGRAM_ENV_PARAM: 539 case PROGRAM_STATE_VAR: 540 case PROGRAM_NAMED_PARAM: 541 case PROGRAM_CONSTANT: 542 case PROGRAM_UNIFORM: 543 this->indirect_addr_consts = true; 544 break; 545 case PROGRAM_IMMEDIATE: 546 assert(!"immediates should not have indirect addressing"); 547 break; 548 default: 549 break; 550 } 551 } 552 else { 553 for (i=0; i<3; i++) { 554 if(inst->src[i].reladdr) { 555 switch(inst->src[i].file) { 556 case PROGRAM_TEMPORARY: 557 this->indirect_addr_temps = true; 558 break; 559 case PROGRAM_LOCAL_PARAM: 560 case PROGRAM_ENV_PARAM: 561 case PROGRAM_STATE_VAR: 562 case PROGRAM_NAMED_PARAM: 563 case PROGRAM_CONSTANT: 564 case PROGRAM_UNIFORM: 565 this->indirect_addr_consts = true; 566 break; 567 case PROGRAM_IMMEDIATE: 568 assert(!"immediates should not have indirect addressing"); 569 break; 570 default: 571 break; 572 } 573 } 574 } 575 } 576 577 this->instructions.push_tail(inst); 578 579 if (native_integers) 580 try_emit_float_set(ir, op, dst); 581 582 return inst; 583} 584 585 586glsl_to_tgsi_instruction * 587glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 588 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 589{ 590 return emit(ir, op, dst, src0, src1, undef_src); 591} 592 593glsl_to_tgsi_instruction * 594glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 595 st_dst_reg dst, st_src_reg src0) 596{ 597 assert(dst.writemask != 0); 598 return emit(ir, op, dst, src0, undef_src, undef_src); 599} 600 601glsl_to_tgsi_instruction * 602glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 603{ 604 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 605} 606 607 /** 608 * Emits the code to convert the result of float SET instructions to integers. 609 */ 610void 611glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 612 st_dst_reg dst) 613{ 614 if ((op == TGSI_OPCODE_SEQ || 615 op == TGSI_OPCODE_SNE || 616 op == TGSI_OPCODE_SGE || 617 op == TGSI_OPCODE_SLT)) 618 { 619 st_src_reg src = st_src_reg(dst); 620 src.negate = ~src.negate; 621 dst.type = GLSL_TYPE_FLOAT; 622 emit(ir, TGSI_OPCODE_F2I, dst, src); 623 } 624} 625 626/** 627 * Determines whether to use an integer, unsigned integer, or float opcode 628 * based on the operands and input opcode, then emits the result. 629 */ 630unsigned 631glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 632 st_dst_reg dst, 633 st_src_reg src0, st_src_reg src1) 634{ 635 int type = GLSL_TYPE_FLOAT; 636 637 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 638 type = GLSL_TYPE_FLOAT; 639 else if (native_integers) 640 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 641 642#define case4(c, f, i, u) \ 643 case TGSI_OPCODE_##c: \ 644 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 645 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 646 else op = TGSI_OPCODE_##f; \ 647 break; 648#define case3(f, i, u) case4(f, f, i, u) 649#define case2fi(f, i) case4(f, f, i, i) 650#define case2iu(i, u) case4(i, LAST, i, u) 651 652 switch(op) { 653 case2fi(ADD, UADD); 654 case2fi(MUL, UMUL); 655 case2fi(MAD, UMAD); 656 case3(DIV, IDIV, UDIV); 657 case3(MAX, IMAX, UMAX); 658 case3(MIN, IMIN, UMIN); 659 case2iu(MOD, UMOD); 660 661 case2fi(SEQ, USEQ); 662 case2fi(SNE, USNE); 663 case3(SGE, ISGE, USGE); 664 case3(SLT, ISLT, USLT); 665 666 case2iu(ISHR, USHR); 667 668 case2fi(SSG, ISSG); 669 case3(ABS, IABS, IABS); 670 671 default: break; 672 } 673 674 assert(op != TGSI_OPCODE_LAST); 675 return op; 676} 677 678glsl_to_tgsi_instruction * 679glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 680 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 681 unsigned elements) 682{ 683 static const unsigned dot_opcodes[] = { 684 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 685 }; 686 687 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 688} 689 690/** 691 * Emits TGSI scalar opcodes to produce unique answers across channels. 692 * 693 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 694 * channel determines the result across all channels. So to do a vec4 695 * of this operation, we want to emit a scalar per source channel used 696 * to produce dest channels. 697 */ 698void 699glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 700 st_dst_reg dst, 701 st_src_reg orig_src0, st_src_reg orig_src1) 702{ 703 int i, j; 704 int done_mask = ~dst.writemask; 705 706 /* TGSI RCP is a scalar operation splatting results to all channels, 707 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 708 * dst channels. 709 */ 710 for (i = 0; i < 4; i++) { 711 GLuint this_mask = (1 << i); 712 glsl_to_tgsi_instruction *inst; 713 st_src_reg src0 = orig_src0; 714 st_src_reg src1 = orig_src1; 715 716 if (done_mask & this_mask) 717 continue; 718 719 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 720 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 721 for (j = i + 1; j < 4; j++) { 722 /* If there is another enabled component in the destination that is 723 * derived from the same inputs, generate its value on this pass as 724 * well. 725 */ 726 if (!(done_mask & (1 << j)) && 727 GET_SWZ(src0.swizzle, j) == src0_swiz && 728 GET_SWZ(src1.swizzle, j) == src1_swiz) { 729 this_mask |= (1 << j); 730 } 731 } 732 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 733 src0_swiz, src0_swiz); 734 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 735 src1_swiz, src1_swiz); 736 737 inst = emit(ir, op, dst, src0, src1); 738 inst->dst.writemask = this_mask; 739 done_mask |= this_mask; 740 } 741} 742 743void 744glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 745 st_dst_reg dst, st_src_reg src0) 746{ 747 st_src_reg undef = undef_src; 748 749 undef.swizzle = SWIZZLE_XXXX; 750 751 emit_scalar(ir, op, dst, src0, undef); 752} 753 754void 755glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 756 st_dst_reg dst, st_src_reg src0) 757{ 758 int op = TGSI_OPCODE_ARL; 759 760 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 761 op = TGSI_OPCODE_UARL; 762 763 emit(NULL, op, dst, src0); 764} 765 766/** 767 * Emit an TGSI_OPCODE_SCS instruction 768 * 769 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 770 * Instead of splatting its result across all four components of the 771 * destination, it writes one value to the \c x component and another value to 772 * the \c y component. 773 * 774 * \param ir IR instruction being processed 775 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 776 * on which value is desired. 777 * \param dst Destination register 778 * \param src Source register 779 */ 780void 781glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 782 st_dst_reg dst, 783 const st_src_reg &src) 784{ 785 /* Vertex programs cannot use the SCS opcode. 786 */ 787 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 788 emit_scalar(ir, op, dst, src); 789 return; 790 } 791 792 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 793 const unsigned scs_mask = (1U << component); 794 int done_mask = ~dst.writemask; 795 st_src_reg tmp; 796 797 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 798 799 /* If there are compnents in the destination that differ from the component 800 * that will be written by the SCS instrution, we'll need a temporary. 801 */ 802 if (scs_mask != unsigned(dst.writemask)) { 803 tmp = get_temp(glsl_type::vec4_type); 804 } 805 806 for (unsigned i = 0; i < 4; i++) { 807 unsigned this_mask = (1U << i); 808 st_src_reg src0 = src; 809 810 if ((done_mask & this_mask) != 0) 811 continue; 812 813 /* The source swizzle specified which component of the source generates 814 * sine / cosine for the current component in the destination. The SCS 815 * instruction requires that this value be swizzle to the X component. 816 * Replace the current swizzle with a swizzle that puts the source in 817 * the X component. 818 */ 819 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 820 821 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 822 src0_swiz, src0_swiz); 823 for (unsigned j = i + 1; j < 4; j++) { 824 /* If there is another enabled component in the destination that is 825 * derived from the same inputs, generate its value on this pass as 826 * well. 827 */ 828 if (!(done_mask & (1 << j)) && 829 GET_SWZ(src0.swizzle, j) == src0_swiz) { 830 this_mask |= (1 << j); 831 } 832 } 833 834 if (this_mask != scs_mask) { 835 glsl_to_tgsi_instruction *inst; 836 st_dst_reg tmp_dst = st_dst_reg(tmp); 837 838 /* Emit the SCS instruction. 839 */ 840 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 841 inst->dst.writemask = scs_mask; 842 843 /* Move the result of the SCS instruction to the desired location in 844 * the destination. 845 */ 846 tmp.swizzle = MAKE_SWIZZLE4(component, component, 847 component, component); 848 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 849 inst->dst.writemask = this_mask; 850 } else { 851 /* Emit the SCS instruction to write directly to the destination. 852 */ 853 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 854 inst->dst.writemask = scs_mask; 855 } 856 857 done_mask |= this_mask; 858 } 859} 860 861int 862glsl_to_tgsi_visitor::add_constant(gl_register_file file, 863 gl_constant_value values[4], int size, int datatype, 864 GLuint *swizzle_out) 865{ 866 if (file == PROGRAM_CONSTANT) { 867 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 868 size, datatype, swizzle_out); 869 } else { 870 int index = 0; 871 immediate_storage *entry; 872 assert(file == PROGRAM_IMMEDIATE); 873 874 /* Search immediate storage to see if we already have an identical 875 * immediate that we can use instead of adding a duplicate entry. 876 */ 877 foreach_iter(exec_list_iterator, iter, this->immediates) { 878 entry = (immediate_storage *)iter.get(); 879 880 if (entry->size == size && 881 entry->type == datatype && 882 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 883 return index; 884 } 885 index++; 886 } 887 888 /* Add this immediate to the list. */ 889 entry = new(mem_ctx) immediate_storage(values, size, datatype); 890 this->immediates.push_tail(entry); 891 this->num_immediates++; 892 return index; 893 } 894} 895 896st_src_reg 897glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 898{ 899 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 900 union gl_constant_value uval; 901 902 uval.f = val; 903 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 904 905 return src; 906} 907 908st_src_reg 909glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 910{ 911 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 912 union gl_constant_value uval; 913 914 assert(native_integers); 915 916 uval.i = val; 917 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 918 919 return src; 920} 921 922st_src_reg 923glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 924{ 925 if (native_integers) 926 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 927 st_src_reg_for_int(val); 928 else 929 return st_src_reg_for_float(val); 930} 931 932static int 933type_size(const struct glsl_type *type) 934{ 935 unsigned int i; 936 int size; 937 938 switch (type->base_type) { 939 case GLSL_TYPE_UINT: 940 case GLSL_TYPE_INT: 941 case GLSL_TYPE_FLOAT: 942 case GLSL_TYPE_BOOL: 943 if (type->is_matrix()) { 944 return type->matrix_columns; 945 } else { 946 /* Regardless of size of vector, it gets a vec4. This is bad 947 * packing for things like floats, but otherwise arrays become a 948 * mess. Hopefully a later pass over the code can pack scalars 949 * down if appropriate. 950 */ 951 return 1; 952 } 953 case GLSL_TYPE_ARRAY: 954 assert(type->length > 0); 955 return type_size(type->fields.array) * type->length; 956 case GLSL_TYPE_STRUCT: 957 size = 0; 958 for (i = 0; i < type->length; i++) { 959 size += type_size(type->fields.structure[i].type); 960 } 961 return size; 962 case GLSL_TYPE_SAMPLER: 963 /* Samplers take up one slot in UNIFORMS[], but they're baked in 964 * at link time. 965 */ 966 return 1; 967 default: 968 assert(0); 969 return 0; 970 } 971} 972 973/** 974 * In the initial pass of codegen, we assign temporary numbers to 975 * intermediate results. (not SSA -- variable assignments will reuse 976 * storage). 977 */ 978st_src_reg 979glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 980{ 981 st_src_reg src; 982 983 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 984 src.file = PROGRAM_TEMPORARY; 985 src.index = next_temp; 986 src.reladdr = NULL; 987 next_temp += type_size(type); 988 989 if (type->is_array() || type->is_record()) { 990 src.swizzle = SWIZZLE_NOOP; 991 } else { 992 src.swizzle = swizzle_for_size(type->vector_elements); 993 } 994 src.negate = 0; 995 996 return src; 997} 998 999variable_storage * 1000glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1001{ 1002 1003 variable_storage *entry; 1004 1005 foreach_iter(exec_list_iterator, iter, this->variables) { 1006 entry = (variable_storage *)iter.get(); 1007 1008 if (entry->var == var) 1009 return entry; 1010 } 1011 1012 return NULL; 1013} 1014 1015void 1016glsl_to_tgsi_visitor::visit(ir_variable *ir) 1017{ 1018 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1019 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1020 1021 fp->OriginUpperLeft = ir->origin_upper_left; 1022 fp->PixelCenterInteger = ir->pixel_center_integer; 1023 } 1024 1025 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1026 unsigned int i; 1027 const ir_state_slot *const slots = ir->state_slots; 1028 assert(ir->state_slots != NULL); 1029 1030 /* Check if this statevar's setup in the STATE file exactly 1031 * matches how we'll want to reference it as a 1032 * struct/array/whatever. If not, then we need to move it into 1033 * temporary storage and hope that it'll get copy-propagated 1034 * out. 1035 */ 1036 for (i = 0; i < ir->num_state_slots; i++) { 1037 if (slots[i].swizzle != SWIZZLE_XYZW) { 1038 break; 1039 } 1040 } 1041 1042 variable_storage *storage; 1043 st_dst_reg dst; 1044 if (i == ir->num_state_slots) { 1045 /* We'll set the index later. */ 1046 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1047 this->variables.push_tail(storage); 1048 1049 dst = undef_dst; 1050 } else { 1051 /* The variable_storage constructor allocates slots based on the size 1052 * of the type. However, this had better match the number of state 1053 * elements that we're going to copy into the new temporary. 1054 */ 1055 assert((int) ir->num_state_slots == type_size(ir->type)); 1056 1057 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1058 this->next_temp); 1059 this->variables.push_tail(storage); 1060 this->next_temp += type_size(ir->type); 1061 1062 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1063 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1064 } 1065 1066 1067 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1068 int index = _mesa_add_state_reference(this->prog->Parameters, 1069 (gl_state_index *)slots[i].tokens); 1070 1071 if (storage->file == PROGRAM_STATE_VAR) { 1072 if (storage->index == -1) { 1073 storage->index = index; 1074 } else { 1075 assert(index == storage->index + (int)i); 1076 } 1077 } else { 1078 st_src_reg src(PROGRAM_STATE_VAR, index, 1079 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); 1080 src.swizzle = slots[i].swizzle; 1081 emit(ir, TGSI_OPCODE_MOV, dst, src); 1082 /* even a float takes up a whole vec4 reg in a struct/array. */ 1083 dst.index++; 1084 } 1085 } 1086 1087 if (storage->file == PROGRAM_TEMPORARY && 1088 dst.index != storage->index + (int) ir->num_state_slots) { 1089 fail_link(this->shader_program, 1090 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1091 ir->name, dst.index - storage->index, 1092 type_size(ir->type)); 1093 } 1094 } 1095} 1096 1097void 1098glsl_to_tgsi_visitor::visit(ir_loop *ir) 1099{ 1100 ir_dereference_variable *counter = NULL; 1101 1102 if (ir->counter != NULL) 1103 counter = new(ir) ir_dereference_variable(ir->counter); 1104 1105 if (ir->from != NULL) { 1106 assert(ir->counter != NULL); 1107 1108 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1109 1110 a->accept(this); 1111 delete a; 1112 } 1113 1114 emit(NULL, TGSI_OPCODE_BGNLOOP); 1115 1116 if (ir->to) { 1117 ir_expression *e = 1118 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1119 counter, ir->to); 1120 ir_if *if_stmt = new(ir) ir_if(e); 1121 1122 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1123 1124 if_stmt->then_instructions.push_tail(brk); 1125 1126 if_stmt->accept(this); 1127 1128 delete if_stmt; 1129 delete e; 1130 delete brk; 1131 } 1132 1133 visit_exec_list(&ir->body_instructions, this); 1134 1135 if (ir->increment) { 1136 ir_expression *e = 1137 new(ir) ir_expression(ir_binop_add, counter->type, 1138 counter, ir->increment); 1139 1140 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1141 1142 a->accept(this); 1143 delete a; 1144 delete e; 1145 } 1146 1147 emit(NULL, TGSI_OPCODE_ENDLOOP); 1148} 1149 1150void 1151glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1152{ 1153 switch (ir->mode) { 1154 case ir_loop_jump::jump_break: 1155 emit(NULL, TGSI_OPCODE_BRK); 1156 break; 1157 case ir_loop_jump::jump_continue: 1158 emit(NULL, TGSI_OPCODE_CONT); 1159 break; 1160 } 1161} 1162 1163 1164void 1165glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1166{ 1167 assert(0); 1168 (void)ir; 1169} 1170 1171void 1172glsl_to_tgsi_visitor::visit(ir_function *ir) 1173{ 1174 /* Ignore function bodies other than main() -- we shouldn't see calls to 1175 * them since they should all be inlined before we get to glsl_to_tgsi. 1176 */ 1177 if (strcmp(ir->name, "main") == 0) { 1178 const ir_function_signature *sig; 1179 exec_list empty; 1180 1181 sig = ir->matching_signature(&empty); 1182 1183 assert(sig); 1184 1185 foreach_iter(exec_list_iterator, iter, sig->body) { 1186 ir_instruction *ir = (ir_instruction *)iter.get(); 1187 1188 ir->accept(this); 1189 } 1190 } 1191} 1192 1193bool 1194glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1195{ 1196 int nonmul_operand = 1 - mul_operand; 1197 st_src_reg a, b, c; 1198 st_dst_reg result_dst; 1199 1200 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1201 if (!expr || expr->operation != ir_binop_mul) 1202 return false; 1203 1204 expr->operands[0]->accept(this); 1205 a = this->result; 1206 expr->operands[1]->accept(this); 1207 b = this->result; 1208 ir->operands[nonmul_operand]->accept(this); 1209 c = this->result; 1210 1211 this->result = get_temp(ir->type); 1212 result_dst = st_dst_reg(this->result); 1213 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1214 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1215 1216 return true; 1217} 1218 1219/** 1220 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1221 * 1222 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1223 * implemented using multiplication, and logical-or is implemented using 1224 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1225 * As result, the logical expression (a & !b) can be rewritten as: 1226 * 1227 * - a * !b 1228 * - a * (1 - b) 1229 * - (a * 1) - (a * b) 1230 * - a + -(a * b) 1231 * - a + (a * -b) 1232 * 1233 * This final expression can be implemented as a single MAD(a, -b, a) 1234 * instruction. 1235 */ 1236bool 1237glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1238{ 1239 const int other_operand = 1 - try_operand; 1240 st_src_reg a, b; 1241 1242 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1243 if (!expr || expr->operation != ir_unop_logic_not) 1244 return false; 1245 1246 ir->operands[other_operand]->accept(this); 1247 a = this->result; 1248 expr->operands[0]->accept(this); 1249 b = this->result; 1250 1251 b.negate = ~b.negate; 1252 1253 this->result = get_temp(ir->type); 1254 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1255 1256 return true; 1257} 1258 1259bool 1260glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1261{ 1262 /* Saturates were only introduced to vertex programs in 1263 * NV_vertex_program3, so don't give them to drivers in the VP. 1264 */ 1265 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1266 return false; 1267 1268 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1269 if (!sat_src) 1270 return false; 1271 1272 sat_src->accept(this); 1273 st_src_reg src = this->result; 1274 1275 /* If we generated an expression instruction into a temporary in 1276 * processing the saturate's operand, apply the saturate to that 1277 * instruction. Otherwise, generate a MOV to do the saturate. 1278 * 1279 * Note that we have to be careful to only do this optimization if 1280 * the instruction in question was what generated src->result. For 1281 * example, ir_dereference_array might generate a MUL instruction 1282 * to create the reladdr, and return us a src reg using that 1283 * reladdr. That MUL result is not the value we're trying to 1284 * saturate. 1285 */ 1286 ir_expression *sat_src_expr = sat_src->as_expression(); 1287 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1288 sat_src_expr->operation == ir_binop_add || 1289 sat_src_expr->operation == ir_binop_dot)) { 1290 glsl_to_tgsi_instruction *new_inst; 1291 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1292 new_inst->saturate = true; 1293 } else { 1294 this->result = get_temp(ir->type); 1295 st_dst_reg result_dst = st_dst_reg(this->result); 1296 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1297 glsl_to_tgsi_instruction *inst; 1298 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1299 inst->saturate = true; 1300 } 1301 1302 return true; 1303} 1304 1305void 1306glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1307 st_src_reg *reg, int *num_reladdr) 1308{ 1309 if (!reg->reladdr) 1310 return; 1311 1312 emit_arl(ir, address_reg, *reg->reladdr); 1313 1314 if (*num_reladdr != 1) { 1315 st_src_reg temp = get_temp(glsl_type::vec4_type); 1316 1317 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1318 *reg = temp; 1319 } 1320 1321 (*num_reladdr)--; 1322} 1323 1324void 1325glsl_to_tgsi_visitor::visit(ir_expression *ir) 1326{ 1327 unsigned int operand; 1328 st_src_reg op[Elements(ir->operands)]; 1329 st_src_reg result_src; 1330 st_dst_reg result_dst; 1331 1332 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1333 */ 1334 if (ir->operation == ir_binop_add) { 1335 if (try_emit_mad(ir, 1)) 1336 return; 1337 if (try_emit_mad(ir, 0)) 1338 return; 1339 } 1340 1341 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1342 */ 1343 if (ir->operation == ir_binop_logic_and) { 1344 if (try_emit_mad_for_and_not(ir, 1)) 1345 return; 1346 if (try_emit_mad_for_and_not(ir, 0)) 1347 return; 1348 } 1349 1350 if (try_emit_sat(ir)) 1351 return; 1352 1353 if (ir->operation == ir_quadop_vector) 1354 assert(!"ir_quadop_vector should have been lowered"); 1355 1356 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1357 this->result.file = PROGRAM_UNDEFINED; 1358 ir->operands[operand]->accept(this); 1359 if (this->result.file == PROGRAM_UNDEFINED) { 1360 ir_print_visitor v; 1361 printf("Failed to get tree for expression operand:\n"); 1362 ir->operands[operand]->accept(&v); 1363 exit(1); 1364 } 1365 op[operand] = this->result; 1366 1367 /* Matrix expression operands should have been broken down to vector 1368 * operations already. 1369 */ 1370 assert(!ir->operands[operand]->type->is_matrix()); 1371 } 1372 1373 int vector_elements = ir->operands[0]->type->vector_elements; 1374 if (ir->operands[1]) { 1375 vector_elements = MAX2(vector_elements, 1376 ir->operands[1]->type->vector_elements); 1377 } 1378 1379 this->result.file = PROGRAM_UNDEFINED; 1380 1381 /* Storage for our result. Ideally for an assignment we'd be using 1382 * the actual storage for the result here, instead. 1383 */ 1384 result_src = get_temp(ir->type); 1385 /* convenience for the emit functions below. */ 1386 result_dst = st_dst_reg(result_src); 1387 /* Limit writes to the channels that will be used by result_src later. 1388 * This does limit this temp's use as a temporary for multi-instruction 1389 * sequences. 1390 */ 1391 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1392 1393 switch (ir->operation) { 1394 case ir_unop_logic_not: 1395 if (result_dst.type != GLSL_TYPE_FLOAT) 1396 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1397 else { 1398 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1399 * older GPUs implement SEQ using multiple instructions (i915 uses two 1400 * SGE instructions and a MUL instruction). Since our logic values are 1401 * 0.0 and 1.0, 1-x also implements !x. 1402 */ 1403 op[0].negate = ~op[0].negate; 1404 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1405 } 1406 break; 1407 case ir_unop_neg: 1408 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1409 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1410 else { 1411 op[0].negate = ~op[0].negate; 1412 result_src = op[0]; 1413 } 1414 break; 1415 case ir_unop_abs: 1416 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1417 break; 1418 case ir_unop_sign: 1419 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1420 break; 1421 case ir_unop_rcp: 1422 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1423 break; 1424 1425 case ir_unop_exp2: 1426 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1427 break; 1428 case ir_unop_exp: 1429 case ir_unop_log: 1430 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1431 break; 1432 case ir_unop_log2: 1433 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1434 break; 1435 case ir_unop_sin: 1436 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1437 break; 1438 case ir_unop_cos: 1439 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1440 break; 1441 case ir_unop_sin_reduced: 1442 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1443 break; 1444 case ir_unop_cos_reduced: 1445 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1446 break; 1447 1448 case ir_unop_dFdx: 1449 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1450 break; 1451 case ir_unop_dFdy: 1452 op[0].negate = ~op[0].negate; 1453 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); 1454 break; 1455 1456 case ir_unop_noise: { 1457 /* At some point, a motivated person could add a better 1458 * implementation of noise. Currently not even the nvidia 1459 * binary drivers do anything more than this. In any case, the 1460 * place to do this is in the GL state tracker, not the poor 1461 * driver. 1462 */ 1463 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1464 break; 1465 } 1466 1467 case ir_binop_add: 1468 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1469 break; 1470 case ir_binop_sub: 1471 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1472 break; 1473 1474 case ir_binop_mul: 1475 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1476 break; 1477 case ir_binop_div: 1478 if (result_dst.type == GLSL_TYPE_FLOAT) 1479 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1480 else 1481 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1482 break; 1483 case ir_binop_mod: 1484 if (result_dst.type == GLSL_TYPE_FLOAT) 1485 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1486 else 1487 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1488 break; 1489 1490 case ir_binop_less: 1491 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1492 break; 1493 case ir_binop_greater: 1494 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1495 break; 1496 case ir_binop_lequal: 1497 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1498 break; 1499 case ir_binop_gequal: 1500 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1501 break; 1502 case ir_binop_equal: 1503 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1504 break; 1505 case ir_binop_nequal: 1506 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1507 break; 1508 case ir_binop_all_equal: 1509 /* "==" operator producing a scalar boolean. */ 1510 if (ir->operands[0]->type->is_vector() || 1511 ir->operands[1]->type->is_vector()) { 1512 st_src_reg temp = get_temp(native_integers ? 1513 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1514 glsl_type::vec4_type); 1515 1516 if (native_integers) { 1517 st_dst_reg temp_dst = st_dst_reg(temp); 1518 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1519 1520 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1521 1522 /* Emit 1-3 AND operations to combine the SEQ results. */ 1523 switch (ir->operands[0]->type->vector_elements) { 1524 case 2: 1525 break; 1526 case 3: 1527 temp_dst.writemask = WRITEMASK_Y; 1528 temp1.swizzle = SWIZZLE_YYYY; 1529 temp2.swizzle = SWIZZLE_ZZZZ; 1530 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1531 break; 1532 case 4: 1533 temp_dst.writemask = WRITEMASK_X; 1534 temp1.swizzle = SWIZZLE_XXXX; 1535 temp2.swizzle = SWIZZLE_YYYY; 1536 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1537 temp_dst.writemask = WRITEMASK_Y; 1538 temp1.swizzle = SWIZZLE_ZZZZ; 1539 temp2.swizzle = SWIZZLE_WWWW; 1540 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1541 } 1542 1543 temp1.swizzle = SWIZZLE_XXXX; 1544 temp2.swizzle = SWIZZLE_YYYY; 1545 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1546 } else { 1547 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1548 1549 /* After the dot-product, the value will be an integer on the 1550 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1551 */ 1552 emit_dp(ir, result_dst, temp, temp, vector_elements); 1553 1554 /* Negating the result of the dot-product gives values on the range 1555 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1556 * This is achieved using SGE. 1557 */ 1558 st_src_reg sge_src = result_src; 1559 sge_src.negate = ~sge_src.negate; 1560 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1561 } 1562 } else { 1563 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1564 } 1565 break; 1566 case ir_binop_any_nequal: 1567 /* "!=" operator producing a scalar boolean. */ 1568 if (ir->operands[0]->type->is_vector() || 1569 ir->operands[1]->type->is_vector()) { 1570 st_src_reg temp = get_temp(native_integers ? 1571 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1572 glsl_type::vec4_type); 1573 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1574 1575 if (native_integers) { 1576 st_dst_reg temp_dst = st_dst_reg(temp); 1577 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1578 1579 /* Emit 1-3 OR operations to combine the SNE results. */ 1580 switch (ir->operands[0]->type->vector_elements) { 1581 case 2: 1582 break; 1583 case 3: 1584 temp_dst.writemask = WRITEMASK_Y; 1585 temp1.swizzle = SWIZZLE_YYYY; 1586 temp2.swizzle = SWIZZLE_ZZZZ; 1587 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1588 break; 1589 case 4: 1590 temp_dst.writemask = WRITEMASK_X; 1591 temp1.swizzle = SWIZZLE_XXXX; 1592 temp2.swizzle = SWIZZLE_YYYY; 1593 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1594 temp_dst.writemask = WRITEMASK_Y; 1595 temp1.swizzle = SWIZZLE_ZZZZ; 1596 temp2.swizzle = SWIZZLE_WWWW; 1597 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1598 } 1599 1600 temp1.swizzle = SWIZZLE_XXXX; 1601 temp2.swizzle = SWIZZLE_YYYY; 1602 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1603 } else { 1604 /* After the dot-product, the value will be an integer on the 1605 * range [0,4]. Zero stays zero, and positive values become 1.0. 1606 */ 1607 glsl_to_tgsi_instruction *const dp = 1608 emit_dp(ir, result_dst, temp, temp, vector_elements); 1609 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1610 /* The clamping to [0,1] can be done for free in the fragment 1611 * shader with a saturate. 1612 */ 1613 dp->saturate = true; 1614 } else { 1615 /* Negating the result of the dot-product gives values on the range 1616 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1617 * achieved using SLT. 1618 */ 1619 st_src_reg slt_src = result_src; 1620 slt_src.negate = ~slt_src.negate; 1621 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1622 } 1623 } 1624 } else { 1625 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1626 } 1627 break; 1628 1629 case ir_unop_any: { 1630 assert(ir->operands[0]->type->is_vector()); 1631 1632 /* After the dot-product, the value will be an integer on the 1633 * range [0,4]. Zero stays zero, and positive values become 1.0. 1634 */ 1635 glsl_to_tgsi_instruction *const dp = 1636 emit_dp(ir, result_dst, op[0], op[0], 1637 ir->operands[0]->type->vector_elements); 1638 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1639 result_dst.type == GLSL_TYPE_FLOAT) { 1640 /* The clamping to [0,1] can be done for free in the fragment 1641 * shader with a saturate. 1642 */ 1643 dp->saturate = true; 1644 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1645 /* Negating the result of the dot-product gives values on the range 1646 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1647 * is achieved using SLT. 1648 */ 1649 st_src_reg slt_src = result_src; 1650 slt_src.negate = ~slt_src.negate; 1651 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1652 } 1653 else { 1654 /* Use SNE 0 if integers are being used as boolean values. */ 1655 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1656 } 1657 break; 1658 } 1659 1660 case ir_binop_logic_xor: 1661 if (native_integers) 1662 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1663 else 1664 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1665 break; 1666 1667 case ir_binop_logic_or: { 1668 if (native_integers) { 1669 /* If integers are used as booleans, we can use an actual "or" 1670 * instruction. 1671 */ 1672 assert(native_integers); 1673 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1674 } else { 1675 /* After the addition, the value will be an integer on the 1676 * range [0,2]. Zero stays zero, and positive values become 1.0. 1677 */ 1678 glsl_to_tgsi_instruction *add = 1679 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1680 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1681 /* The clamping to [0,1] can be done for free in the fragment 1682 * shader with a saturate if floats are being used as boolean values. 1683 */ 1684 add->saturate = true; 1685 } else { 1686 /* Negating the result of the addition gives values on the range 1687 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1688 * is achieved using SLT. 1689 */ 1690 st_src_reg slt_src = result_src; 1691 slt_src.negate = ~slt_src.negate; 1692 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1693 } 1694 } 1695 break; 1696 } 1697 1698 case ir_binop_logic_and: 1699 /* If native integers are disabled, the bool args are stored as float 0.0 1700 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1701 * actual AND opcode. 1702 */ 1703 if (native_integers) 1704 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1705 else 1706 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1707 break; 1708 1709 case ir_binop_dot: 1710 assert(ir->operands[0]->type->is_vector()); 1711 assert(ir->operands[0]->type == ir->operands[1]->type); 1712 emit_dp(ir, result_dst, op[0], op[1], 1713 ir->operands[0]->type->vector_elements); 1714 break; 1715 1716 case ir_unop_sqrt: 1717 /* sqrt(x) = x * rsq(x). */ 1718 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1719 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1720 /* For incoming channels <= 0, set the result to 0. */ 1721 op[0].negate = ~op[0].negate; 1722 emit(ir, TGSI_OPCODE_CMP, result_dst, 1723 op[0], result_src, st_src_reg_for_float(0.0)); 1724 break; 1725 case ir_unop_rsq: 1726 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1727 break; 1728 case ir_unop_i2f: 1729 if (native_integers) { 1730 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1731 break; 1732 } 1733 /* fallthrough to next case otherwise */ 1734 case ir_unop_b2f: 1735 if (native_integers) { 1736 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1737 break; 1738 } 1739 /* fallthrough to next case otherwise */ 1740 case ir_unop_i2u: 1741 case ir_unop_u2i: 1742 /* Converting between signed and unsigned integers is a no-op. */ 1743 result_src = op[0]; 1744 break; 1745 case ir_unop_b2i: 1746 if (native_integers) { 1747 /* Booleans are stored as integers using ~0 for true and 0 for false. 1748 * GLSL requires that int(bool) return 1 for true and 0 for false. 1749 * This conversion is done with AND, but it could be done with NEG. 1750 */ 1751 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1752 } else { 1753 /* Booleans and integers are both stored as floats when native 1754 * integers are disabled. 1755 */ 1756 result_src = op[0]; 1757 } 1758 break; 1759 case ir_unop_f2i: 1760 if (native_integers) 1761 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1762 else 1763 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1764 break; 1765 case ir_unop_f2b: 1766 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1767 break; 1768 case ir_unop_i2b: 1769 if (native_integers) 1770 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1771 else 1772 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1773 break; 1774 case ir_unop_trunc: 1775 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1776 break; 1777 case ir_unop_ceil: 1778 emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1779 break; 1780 case ir_unop_floor: 1781 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1782 break; 1783 case ir_unop_round_even: 1784 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1785 break; 1786 case ir_unop_fract: 1787 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1788 break; 1789 1790 case ir_binop_min: 1791 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1792 break; 1793 case ir_binop_max: 1794 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1795 break; 1796 case ir_binop_pow: 1797 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1798 break; 1799 1800 case ir_unop_bit_not: 1801 if (native_integers) { 1802 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1803 break; 1804 } 1805 case ir_unop_u2f: 1806 if (native_integers) { 1807 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1808 break; 1809 } 1810 case ir_binop_lshift: 1811 if (native_integers) { 1812 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1813 break; 1814 } 1815 case ir_binop_rshift: 1816 if (native_integers) { 1817 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1818 break; 1819 } 1820 case ir_binop_bit_and: 1821 if (native_integers) { 1822 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1823 break; 1824 } 1825 case ir_binop_bit_xor: 1826 if (native_integers) { 1827 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1828 break; 1829 } 1830 case ir_binop_bit_or: 1831 if (native_integers) { 1832 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1833 break; 1834 } 1835 1836 assert(!"GLSL 1.30 features unsupported"); 1837 break; 1838 1839 case ir_quadop_vector: 1840 /* This operation should have already been handled. 1841 */ 1842 assert(!"Should not get here."); 1843 break; 1844 } 1845 1846 this->result = result_src; 1847} 1848 1849 1850void 1851glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1852{ 1853 st_src_reg src; 1854 int i; 1855 int swizzle[4]; 1856 1857 /* Note that this is only swizzles in expressions, not those on the left 1858 * hand side of an assignment, which do write masking. See ir_assignment 1859 * for that. 1860 */ 1861 1862 ir->val->accept(this); 1863 src = this->result; 1864 assert(src.file != PROGRAM_UNDEFINED); 1865 1866 for (i = 0; i < 4; i++) { 1867 if (i < ir->type->vector_elements) { 1868 switch (i) { 1869 case 0: 1870 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1871 break; 1872 case 1: 1873 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1874 break; 1875 case 2: 1876 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1877 break; 1878 case 3: 1879 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1880 break; 1881 } 1882 } else { 1883 /* If the type is smaller than a vec4, replicate the last 1884 * channel out. 1885 */ 1886 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1887 } 1888 } 1889 1890 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1891 1892 this->result = src; 1893} 1894 1895void 1896glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1897{ 1898 variable_storage *entry = find_variable_storage(ir->var); 1899 ir_variable *var = ir->var; 1900 1901 if (!entry) { 1902 switch (var->mode) { 1903 case ir_var_uniform: 1904 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1905 var->location); 1906 this->variables.push_tail(entry); 1907 break; 1908 case ir_var_in: 1909 case ir_var_inout: 1910 /* The linker assigns locations for varyings and attributes, 1911 * including deprecated builtins (like gl_Color), user-assign 1912 * generic attributes (glBindVertexLocation), and 1913 * user-defined varyings. 1914 * 1915 * FINISHME: We would hit this path for function arguments. Fix! 1916 */ 1917 assert(var->location != -1); 1918 entry = new(mem_ctx) variable_storage(var, 1919 PROGRAM_INPUT, 1920 var->location); 1921 break; 1922 case ir_var_out: 1923 assert(var->location != -1); 1924 entry = new(mem_ctx) variable_storage(var, 1925 PROGRAM_OUTPUT, 1926 var->location + var->index); 1927 break; 1928 case ir_var_system_value: 1929 entry = new(mem_ctx) variable_storage(var, 1930 PROGRAM_SYSTEM_VALUE, 1931 var->location); 1932 break; 1933 case ir_var_auto: 1934 case ir_var_temporary: 1935 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1936 this->next_temp); 1937 this->variables.push_tail(entry); 1938 1939 next_temp += type_size(var->type); 1940 break; 1941 } 1942 1943 if (!entry) { 1944 printf("Failed to make storage for %s\n", var->name); 1945 exit(1); 1946 } 1947 } 1948 1949 this->result = st_src_reg(entry->file, entry->index, var->type); 1950 if (!native_integers) 1951 this->result.type = GLSL_TYPE_FLOAT; 1952} 1953 1954void 1955glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 1956{ 1957 ir_constant *index; 1958 st_src_reg src; 1959 int element_size = type_size(ir->type); 1960 1961 index = ir->array_index->constant_expression_value(); 1962 1963 ir->array->accept(this); 1964 src = this->result; 1965 1966 if (index) { 1967 src.index += index->value.i[0] * element_size; 1968 } else { 1969 /* Variable index array dereference. It eats the "vec4" of the 1970 * base of the array and an index that offsets the TGSI register 1971 * index. 1972 */ 1973 ir->array_index->accept(this); 1974 1975 st_src_reg index_reg; 1976 1977 if (element_size == 1) { 1978 index_reg = this->result; 1979 } else { 1980 index_reg = get_temp(native_integers ? 1981 glsl_type::int_type : glsl_type::float_type); 1982 1983 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 1984 this->result, st_src_reg_for_type(index_reg.type, element_size)); 1985 } 1986 1987 /* If there was already a relative address register involved, add the 1988 * new and the old together to get the new offset. 1989 */ 1990 if (src.reladdr != NULL) { 1991 st_src_reg accum_reg = get_temp(native_integers ? 1992 glsl_type::int_type : glsl_type::float_type); 1993 1994 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 1995 index_reg, *src.reladdr); 1996 1997 index_reg = accum_reg; 1998 } 1999 2000 src.reladdr = ralloc(mem_ctx, st_src_reg); 2001 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2002 } 2003 2004 /* If the type is smaller than a vec4, replicate the last channel out. */ 2005 if (ir->type->is_scalar() || ir->type->is_vector()) 2006 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2007 else 2008 src.swizzle = SWIZZLE_NOOP; 2009 2010 this->result = src; 2011} 2012 2013void 2014glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2015{ 2016 unsigned int i; 2017 const glsl_type *struct_type = ir->record->type; 2018 int offset = 0; 2019 2020 ir->record->accept(this); 2021 2022 for (i = 0; i < struct_type->length; i++) { 2023 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2024 break; 2025 offset += type_size(struct_type->fields.structure[i].type); 2026 } 2027 2028 /* If the type is smaller than a vec4, replicate the last channel out. */ 2029 if (ir->type->is_scalar() || ir->type->is_vector()) 2030 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2031 else 2032 this->result.swizzle = SWIZZLE_NOOP; 2033 2034 this->result.index += offset; 2035} 2036 2037/** 2038 * We want to be careful in assignment setup to hit the actual storage 2039 * instead of potentially using a temporary like we might with the 2040 * ir_dereference handler. 2041 */ 2042static st_dst_reg 2043get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2044{ 2045 /* The LHS must be a dereference. If the LHS is a variable indexed array 2046 * access of a vector, it must be separated into a series conditional moves 2047 * before reaching this point (see ir_vec_index_to_cond_assign). 2048 */ 2049 assert(ir->as_dereference()); 2050 ir_dereference_array *deref_array = ir->as_dereference_array(); 2051 if (deref_array) { 2052 assert(!deref_array->array->type->is_vector()); 2053 } 2054 2055 /* Use the rvalue deref handler for the most part. We'll ignore 2056 * swizzles in it and write swizzles using writemask, though. 2057 */ 2058 ir->accept(v); 2059 return st_dst_reg(v->result); 2060} 2061 2062/** 2063 * Process the condition of a conditional assignment 2064 * 2065 * Examines the condition of a conditional assignment to generate the optimal 2066 * first operand of a \c CMP instruction. If the condition is a relational 2067 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2068 * used as the source for the \c CMP instruction. Otherwise the comparison 2069 * is processed to a boolean result, and the boolean result is used as the 2070 * operand to the CMP instruction. 2071 */ 2072bool 2073glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2074{ 2075 ir_rvalue *src_ir = ir; 2076 bool negate = true; 2077 bool switch_order = false; 2078 2079 ir_expression *const expr = ir->as_expression(); 2080 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2081 bool zero_on_left = false; 2082 2083 if (expr->operands[0]->is_zero()) { 2084 src_ir = expr->operands[1]; 2085 zero_on_left = true; 2086 } else if (expr->operands[1]->is_zero()) { 2087 src_ir = expr->operands[0]; 2088 zero_on_left = false; 2089 } 2090 2091 /* a is - 0 + - 0 + 2092 * (a < 0) T F F ( a < 0) T F F 2093 * (0 < a) F F T (-a < 0) F F T 2094 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2095 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2096 * (a > 0) F F T (-a < 0) F F T 2097 * (0 > a) T F F ( a < 0) T F F 2098 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2099 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2100 * 2101 * Note that exchanging the order of 0 and 'a' in the comparison simply 2102 * means that the value of 'a' should be negated. 2103 */ 2104 if (src_ir != ir) { 2105 switch (expr->operation) { 2106 case ir_binop_less: 2107 switch_order = false; 2108 negate = zero_on_left; 2109 break; 2110 2111 case ir_binop_greater: 2112 switch_order = false; 2113 negate = !zero_on_left; 2114 break; 2115 2116 case ir_binop_lequal: 2117 switch_order = true; 2118 negate = !zero_on_left; 2119 break; 2120 2121 case ir_binop_gequal: 2122 switch_order = true; 2123 negate = zero_on_left; 2124 break; 2125 2126 default: 2127 /* This isn't the right kind of comparison afterall, so make sure 2128 * the whole condition is visited. 2129 */ 2130 src_ir = ir; 2131 break; 2132 } 2133 } 2134 } 2135 2136 src_ir->accept(this); 2137 2138 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2139 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2140 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2141 * computing the condition. 2142 */ 2143 if (negate) 2144 this->result.negate = ~this->result.negate; 2145 2146 return switch_order; 2147} 2148 2149void 2150glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2151{ 2152 st_dst_reg l; 2153 st_src_reg r; 2154 int i; 2155 2156 ir->rhs->accept(this); 2157 r = this->result; 2158 2159 l = get_assignment_lhs(ir->lhs, this); 2160 2161 /* FINISHME: This should really set to the correct maximal writemask for each 2162 * FINISHME: component written (in the loops below). This case can only 2163 * FINISHME: occur for matrices, arrays, and structures. 2164 */ 2165 if (ir->write_mask == 0) { 2166 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2167 l.writemask = WRITEMASK_XYZW; 2168 } else if (ir->lhs->type->is_scalar() && 2169 ir->lhs->variable_referenced()->mode == ir_var_out) { 2170 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2171 * FINISHME: W component of fragment shader output zero, work correctly. 2172 */ 2173 l.writemask = WRITEMASK_XYZW; 2174 } else { 2175 int swizzles[4]; 2176 int first_enabled_chan = 0; 2177 int rhs_chan = 0; 2178 2179 l.writemask = ir->write_mask; 2180 2181 for (int i = 0; i < 4; i++) { 2182 if (l.writemask & (1 << i)) { 2183 first_enabled_chan = GET_SWZ(r.swizzle, i); 2184 break; 2185 } 2186 } 2187 2188 /* Swizzle a small RHS vector into the channels being written. 2189 * 2190 * glsl ir treats write_mask as dictating how many channels are 2191 * present on the RHS while TGSI treats write_mask as just 2192 * showing which channels of the vec4 RHS get written. 2193 */ 2194 for (int i = 0; i < 4; i++) { 2195 if (l.writemask & (1 << i)) 2196 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2197 else 2198 swizzles[i] = first_enabled_chan; 2199 } 2200 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2201 swizzles[2], swizzles[3]); 2202 } 2203 2204 assert(l.file != PROGRAM_UNDEFINED); 2205 assert(r.file != PROGRAM_UNDEFINED); 2206 2207 if (ir->condition) { 2208 const bool switch_order = this->process_move_condition(ir->condition); 2209 st_src_reg condition = this->result; 2210 2211 for (i = 0; i < type_size(ir->lhs->type); i++) { 2212 st_src_reg l_src = st_src_reg(l); 2213 st_src_reg condition_temp = condition; 2214 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2215 2216 if (native_integers) { 2217 /* This is necessary because TGSI's CMP instruction expects the 2218 * condition to be a float, and we store booleans as integers. 2219 * If TGSI had a UCMP instruction or similar, this extra 2220 * instruction would not be necessary. 2221 */ 2222 condition_temp = get_temp(glsl_type::vec4_type); 2223 condition.negate = 0; 2224 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2225 condition_temp.swizzle = condition.swizzle; 2226 } 2227 2228 if (switch_order) { 2229 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2230 } else { 2231 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2232 } 2233 2234 l.index++; 2235 r.index++; 2236 } 2237 } else if (ir->rhs->as_expression() && 2238 this->instructions.get_tail() && 2239 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2240 type_size(ir->lhs->type) == 1 && 2241 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2242 /* To avoid emitting an extra MOV when assigning an expression to a 2243 * variable, emit the last instruction of the expression again, but 2244 * replace the destination register with the target of the assignment. 2245 * Dead code elimination will remove the original instruction. 2246 */ 2247 glsl_to_tgsi_instruction *inst, *new_inst; 2248 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2249 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2250 new_inst->saturate = inst->saturate; 2251 inst->dead_mask = inst->dst.writemask; 2252 } else { 2253 for (i = 0; i < type_size(ir->lhs->type); i++) { 2254 emit(ir, TGSI_OPCODE_MOV, l, r); 2255 l.index++; 2256 r.index++; 2257 } 2258 } 2259} 2260 2261 2262void 2263glsl_to_tgsi_visitor::visit(ir_constant *ir) 2264{ 2265 st_src_reg src; 2266 GLfloat stack_vals[4] = { 0 }; 2267 gl_constant_value *values = (gl_constant_value *) stack_vals; 2268 GLenum gl_type = GL_NONE; 2269 unsigned int i; 2270 static int in_array = 0; 2271 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2272 2273 /* Unfortunately, 4 floats is all we can get into 2274 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2275 * aggregate constant and move each constant value into it. If we 2276 * get lucky, copy propagation will eliminate the extra moves. 2277 */ 2278 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2279 st_src_reg temp_base = get_temp(ir->type); 2280 st_dst_reg temp = st_dst_reg(temp_base); 2281 2282 foreach_iter(exec_list_iterator, iter, ir->components) { 2283 ir_constant *field_value = (ir_constant *)iter.get(); 2284 int size = type_size(field_value->type); 2285 2286 assert(size > 0); 2287 2288 field_value->accept(this); 2289 src = this->result; 2290 2291 for (i = 0; i < (unsigned int)size; i++) { 2292 emit(ir, TGSI_OPCODE_MOV, temp, src); 2293 2294 src.index++; 2295 temp.index++; 2296 } 2297 } 2298 this->result = temp_base; 2299 return; 2300 } 2301 2302 if (ir->type->is_array()) { 2303 st_src_reg temp_base = get_temp(ir->type); 2304 st_dst_reg temp = st_dst_reg(temp_base); 2305 int size = type_size(ir->type->fields.array); 2306 2307 assert(size > 0); 2308 in_array++; 2309 2310 for (i = 0; i < ir->type->length; i++) { 2311 ir->array_elements[i]->accept(this); 2312 src = this->result; 2313 for (int j = 0; j < size; j++) { 2314 emit(ir, TGSI_OPCODE_MOV, temp, src); 2315 2316 src.index++; 2317 temp.index++; 2318 } 2319 } 2320 this->result = temp_base; 2321 in_array--; 2322 return; 2323 } 2324 2325 if (ir->type->is_matrix()) { 2326 st_src_reg mat = get_temp(ir->type); 2327 st_dst_reg mat_column = st_dst_reg(mat); 2328 2329 for (i = 0; i < ir->type->matrix_columns; i++) { 2330 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2331 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2332 2333 src = st_src_reg(file, -1, ir->type->base_type); 2334 src.index = add_constant(file, 2335 values, 2336 ir->type->vector_elements, 2337 GL_FLOAT, 2338 &src.swizzle); 2339 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2340 2341 mat_column.index++; 2342 } 2343 2344 this->result = mat; 2345 return; 2346 } 2347 2348 switch (ir->type->base_type) { 2349 case GLSL_TYPE_FLOAT: 2350 gl_type = GL_FLOAT; 2351 for (i = 0; i < ir->type->vector_elements; i++) { 2352 values[i].f = ir->value.f[i]; 2353 } 2354 break; 2355 case GLSL_TYPE_UINT: 2356 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2357 for (i = 0; i < ir->type->vector_elements; i++) { 2358 if (native_integers) 2359 values[i].u = ir->value.u[i]; 2360 else 2361 values[i].f = ir->value.u[i]; 2362 } 2363 break; 2364 case GLSL_TYPE_INT: 2365 gl_type = native_integers ? GL_INT : GL_FLOAT; 2366 for (i = 0; i < ir->type->vector_elements; i++) { 2367 if (native_integers) 2368 values[i].i = ir->value.i[i]; 2369 else 2370 values[i].f = ir->value.i[i]; 2371 } 2372 break; 2373 case GLSL_TYPE_BOOL: 2374 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2375 for (i = 0; i < ir->type->vector_elements; i++) { 2376 if (native_integers) 2377 values[i].u = ir->value.b[i] ? ~0 : 0; 2378 else 2379 values[i].f = ir->value.b[i]; 2380 } 2381 break; 2382 default: 2383 assert(!"Non-float/uint/int/bool constant"); 2384 } 2385 2386 this->result = st_src_reg(file, -1, ir->type); 2387 this->result.index = add_constant(file, 2388 values, 2389 ir->type->vector_elements, 2390 gl_type, 2391 &this->result.swizzle); 2392} 2393 2394function_entry * 2395glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2396{ 2397 function_entry *entry; 2398 2399 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2400 entry = (function_entry *)iter.get(); 2401 2402 if (entry->sig == sig) 2403 return entry; 2404 } 2405 2406 entry = ralloc(mem_ctx, function_entry); 2407 entry->sig = sig; 2408 entry->sig_id = this->next_signature_id++; 2409 entry->bgn_inst = NULL; 2410 2411 /* Allocate storage for all the parameters. */ 2412 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2413 ir_variable *param = (ir_variable *)iter.get(); 2414 variable_storage *storage; 2415 2416 storage = find_variable_storage(param); 2417 assert(!storage); 2418 2419 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2420 this->next_temp); 2421 this->variables.push_tail(storage); 2422 2423 this->next_temp += type_size(param->type); 2424 } 2425 2426 if (!sig->return_type->is_void()) { 2427 entry->return_reg = get_temp(sig->return_type); 2428 } else { 2429 entry->return_reg = undef_src; 2430 } 2431 2432 this->function_signatures.push_tail(entry); 2433 return entry; 2434} 2435 2436void 2437glsl_to_tgsi_visitor::visit(ir_call *ir) 2438{ 2439 glsl_to_tgsi_instruction *call_inst; 2440 ir_function_signature *sig = ir->callee; 2441 function_entry *entry = get_function_signature(sig); 2442 int i; 2443 2444 /* Process in parameters. */ 2445 exec_list_iterator sig_iter = sig->parameters.iterator(); 2446 foreach_iter(exec_list_iterator, iter, *ir) { 2447 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2448 ir_variable *param = (ir_variable *)sig_iter.get(); 2449 2450 if (param->mode == ir_var_in || 2451 param->mode == ir_var_inout) { 2452 variable_storage *storage = find_variable_storage(param); 2453 assert(storage); 2454 2455 param_rval->accept(this); 2456 st_src_reg r = this->result; 2457 2458 st_dst_reg l; 2459 l.file = storage->file; 2460 l.index = storage->index; 2461 l.reladdr = NULL; 2462 l.writemask = WRITEMASK_XYZW; 2463 l.cond_mask = COND_TR; 2464 2465 for (i = 0; i < type_size(param->type); i++) { 2466 emit(ir, TGSI_OPCODE_MOV, l, r); 2467 l.index++; 2468 r.index++; 2469 } 2470 } 2471 2472 sig_iter.next(); 2473 } 2474 assert(!sig_iter.has_next()); 2475 2476 /* Emit call instruction */ 2477 call_inst = emit(ir, TGSI_OPCODE_CAL); 2478 call_inst->function = entry; 2479 2480 /* Process out parameters. */ 2481 sig_iter = sig->parameters.iterator(); 2482 foreach_iter(exec_list_iterator, iter, *ir) { 2483 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2484 ir_variable *param = (ir_variable *)sig_iter.get(); 2485 2486 if (param->mode == ir_var_out || 2487 param->mode == ir_var_inout) { 2488 variable_storage *storage = find_variable_storage(param); 2489 assert(storage); 2490 2491 st_src_reg r; 2492 r.file = storage->file; 2493 r.index = storage->index; 2494 r.reladdr = NULL; 2495 r.swizzle = SWIZZLE_NOOP; 2496 r.negate = 0; 2497 2498 param_rval->accept(this); 2499 st_dst_reg l = st_dst_reg(this->result); 2500 2501 for (i = 0; i < type_size(param->type); i++) { 2502 emit(ir, TGSI_OPCODE_MOV, l, r); 2503 l.index++; 2504 r.index++; 2505 } 2506 } 2507 2508 sig_iter.next(); 2509 } 2510 assert(!sig_iter.has_next()); 2511 2512 /* Process return value. */ 2513 this->result = entry->return_reg; 2514} 2515 2516void 2517glsl_to_tgsi_visitor::visit(ir_texture *ir) 2518{ 2519 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2520 st_dst_reg result_dst, coord_dst; 2521 glsl_to_tgsi_instruction *inst = NULL; 2522 unsigned opcode = TGSI_OPCODE_NOP; 2523 2524 if (ir->coordinate) { 2525 ir->coordinate->accept(this); 2526 2527 /* Put our coords in a temp. We'll need to modify them for shadow, 2528 * projection, or LOD, so the only case we'd use it as is is if 2529 * we're doing plain old texturing. The optimization passes on 2530 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2531 */ 2532 coord = get_temp(glsl_type::vec4_type); 2533 coord_dst = st_dst_reg(coord); 2534 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2535 } 2536 2537 if (ir->projector) { 2538 ir->projector->accept(this); 2539 projector = this->result; 2540 } 2541 2542 /* Storage for our result. Ideally for an assignment we'd be using 2543 * the actual storage for the result here, instead. 2544 */ 2545 result_src = get_temp(glsl_type::vec4_type); 2546 result_dst = st_dst_reg(result_src); 2547 2548 switch (ir->op) { 2549 case ir_tex: 2550 opcode = TGSI_OPCODE_TEX; 2551 break; 2552 case ir_txb: 2553 opcode = TGSI_OPCODE_TXB; 2554 ir->lod_info.bias->accept(this); 2555 lod_info = this->result; 2556 break; 2557 case ir_txl: 2558 opcode = TGSI_OPCODE_TXL; 2559 ir->lod_info.lod->accept(this); 2560 lod_info = this->result; 2561 break; 2562 case ir_txd: 2563 opcode = TGSI_OPCODE_TXD; 2564 ir->lod_info.grad.dPdx->accept(this); 2565 dx = this->result; 2566 ir->lod_info.grad.dPdy->accept(this); 2567 dy = this->result; 2568 break; 2569 case ir_txs: 2570 opcode = TGSI_OPCODE_TXQ; 2571 ir->lod_info.lod->accept(this); 2572 lod_info = this->result; 2573 break; 2574 case ir_txf: 2575 opcode = TGSI_OPCODE_TXF; 2576 ir->lod_info.lod->accept(this); 2577 lod_info = this->result; 2578 if (ir->offset) { 2579 ir->offset->accept(this); 2580 offset = this->result; 2581 } 2582 break; 2583 } 2584 2585 const glsl_type *sampler_type = ir->sampler->type; 2586 2587 if (ir->projector) { 2588 if (opcode == TGSI_OPCODE_TEX) { 2589 /* Slot the projector in as the last component of the coord. */ 2590 coord_dst.writemask = WRITEMASK_W; 2591 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2592 coord_dst.writemask = WRITEMASK_XYZW; 2593 opcode = TGSI_OPCODE_TXP; 2594 } else { 2595 st_src_reg coord_w = coord; 2596 coord_w.swizzle = SWIZZLE_WWWW; 2597 2598 /* For the other TEX opcodes there's no projective version 2599 * since the last slot is taken up by LOD info. Do the 2600 * projective divide now. 2601 */ 2602 coord_dst.writemask = WRITEMASK_W; 2603 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2604 2605 /* In the case where we have to project the coordinates "by hand," 2606 * the shadow comparator value must also be projected. 2607 */ 2608 st_src_reg tmp_src = coord; 2609 if (ir->shadow_comparitor) { 2610 /* Slot the shadow value in as the second to last component of the 2611 * coord. 2612 */ 2613 ir->shadow_comparitor->accept(this); 2614 2615 tmp_src = get_temp(glsl_type::vec4_type); 2616 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2617 2618 /* Projective division not allowed for array samplers. */ 2619 assert(!sampler_type->sampler_array); 2620 2621 tmp_dst.writemask = WRITEMASK_Z; 2622 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2623 2624 tmp_dst.writemask = WRITEMASK_XY; 2625 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2626 } 2627 2628 coord_dst.writemask = WRITEMASK_XYZ; 2629 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2630 2631 coord_dst.writemask = WRITEMASK_XYZW; 2632 coord.swizzle = SWIZZLE_XYZW; 2633 } 2634 } 2635 2636 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2637 * comparator was put in the correct place (and projected) by the code, 2638 * above, that handles by-hand projection. 2639 */ 2640 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2641 /* Slot the shadow value in as the second to last component of the 2642 * coord. 2643 */ 2644 ir->shadow_comparitor->accept(this); 2645 2646 /* XXX This will need to be updated for cubemap array samplers. */ 2647 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2648 sampler_type->sampler_array) || 2649 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 2650 coord_dst.writemask = WRITEMASK_W; 2651 } else { 2652 coord_dst.writemask = WRITEMASK_Z; 2653 } 2654 2655 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2656 coord_dst.writemask = WRITEMASK_XYZW; 2657 } 2658 2659 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2660 opcode == TGSI_OPCODE_TXF) { 2661 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2662 coord_dst.writemask = WRITEMASK_W; 2663 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2664 coord_dst.writemask = WRITEMASK_XYZW; 2665 } 2666 2667 if (opcode == TGSI_OPCODE_TXD) 2668 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2669 else if (opcode == TGSI_OPCODE_TXQ) 2670 inst = emit(ir, opcode, result_dst, lod_info); 2671 else if (opcode == TGSI_OPCODE_TXF) { 2672 inst = emit(ir, opcode, result_dst, coord); 2673 } else 2674 inst = emit(ir, opcode, result_dst, coord); 2675 2676 if (ir->shadow_comparitor) 2677 inst->tex_shadow = GL_TRUE; 2678 2679 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2680 this->shader_program, 2681 this->prog); 2682 2683 if (ir->offset) { 2684 inst->tex_offset_num_offset = 1; 2685 inst->tex_offsets[0].Index = offset.index; 2686 inst->tex_offsets[0].File = offset.file; 2687 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2688 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2689 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2690 } 2691 2692 switch (sampler_type->sampler_dimensionality) { 2693 case GLSL_SAMPLER_DIM_1D: 2694 inst->tex_target = (sampler_type->sampler_array) 2695 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2696 break; 2697 case GLSL_SAMPLER_DIM_2D: 2698 inst->tex_target = (sampler_type->sampler_array) 2699 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2700 break; 2701 case GLSL_SAMPLER_DIM_3D: 2702 inst->tex_target = TEXTURE_3D_INDEX; 2703 break; 2704 case GLSL_SAMPLER_DIM_CUBE: 2705 inst->tex_target = TEXTURE_CUBE_INDEX; 2706 break; 2707 case GLSL_SAMPLER_DIM_RECT: 2708 inst->tex_target = TEXTURE_RECT_INDEX; 2709 break; 2710 case GLSL_SAMPLER_DIM_BUF: 2711 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2712 break; 2713 case GLSL_SAMPLER_DIM_EXTERNAL: 2714 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2715 break; 2716 default: 2717 assert(!"Should not get here."); 2718 } 2719 2720 this->result = result_src; 2721} 2722 2723void 2724glsl_to_tgsi_visitor::visit(ir_return *ir) 2725{ 2726 if (ir->get_value()) { 2727 st_dst_reg l; 2728 int i; 2729 2730 assert(current_function); 2731 2732 ir->get_value()->accept(this); 2733 st_src_reg r = this->result; 2734 2735 l = st_dst_reg(current_function->return_reg); 2736 2737 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2738 emit(ir, TGSI_OPCODE_MOV, l, r); 2739 l.index++; 2740 r.index++; 2741 } 2742 } 2743 2744 emit(ir, TGSI_OPCODE_RET); 2745} 2746 2747void 2748glsl_to_tgsi_visitor::visit(ir_discard *ir) 2749{ 2750 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 2751 2752 if (ir->condition) { 2753 ir->condition->accept(this); 2754 this->result.negate = ~this->result.negate; 2755 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2756 } else { 2757 emit(ir, TGSI_OPCODE_KILP); 2758 } 2759 2760 fp->UsesKill = GL_TRUE; 2761} 2762 2763void 2764glsl_to_tgsi_visitor::visit(ir_if *ir) 2765{ 2766 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2767 glsl_to_tgsi_instruction *prev_inst; 2768 2769 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2770 2771 ir->condition->accept(this); 2772 assert(this->result.file != PROGRAM_UNDEFINED); 2773 2774 if (this->options->EmitCondCodes) { 2775 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2776 2777 /* See if we actually generated any instruction for generating 2778 * the condition. If not, then cook up a move to a temp so we 2779 * have something to set cond_update on. 2780 */ 2781 if (cond_inst == prev_inst) { 2782 st_src_reg temp = get_temp(glsl_type::bool_type); 2783 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2784 } 2785 cond_inst->cond_update = GL_TRUE; 2786 2787 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2788 if_inst->dst.cond_mask = COND_NE; 2789 } else { 2790 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2791 } 2792 2793 this->instructions.push_tail(if_inst); 2794 2795 visit_exec_list(&ir->then_instructions, this); 2796 2797 if (!ir->else_instructions.is_empty()) { 2798 emit(ir->condition, TGSI_OPCODE_ELSE); 2799 visit_exec_list(&ir->else_instructions, this); 2800 } 2801 2802 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2803} 2804 2805glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2806{ 2807 result.file = PROGRAM_UNDEFINED; 2808 next_temp = 1; 2809 next_signature_id = 1; 2810 num_immediates = 0; 2811 current_function = NULL; 2812 num_address_regs = 0; 2813 indirect_addr_temps = false; 2814 indirect_addr_consts = false; 2815 mem_ctx = ralloc_context(NULL); 2816 ctx = NULL; 2817 prog = NULL; 2818 shader_program = NULL; 2819 options = NULL; 2820} 2821 2822glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2823{ 2824 ralloc_free(mem_ctx); 2825} 2826 2827extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2828{ 2829 delete v; 2830} 2831 2832 2833/** 2834 * Count resources used by the given gpu program (number of texture 2835 * samplers, etc). 2836 */ 2837static void 2838count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2839{ 2840 v->samplers_used = 0; 2841 2842 foreach_iter(exec_list_iterator, iter, v->instructions) { 2843 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2844 2845 if (is_tex_instruction(inst->op)) { 2846 v->samplers_used |= 1 << inst->sampler; 2847 2848 if (inst->tex_shadow) { 2849 prog->ShadowSamplers |= 1 << inst->sampler; 2850 } 2851 } 2852 } 2853 2854 prog->SamplersUsed = v->samplers_used; 2855 2856 if (v->shader_program != NULL) 2857 _mesa_update_shader_textures_used(v->shader_program, prog); 2858} 2859 2860static void 2861set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2862 struct gl_shader_program *shader_program, 2863 const char *name, const glsl_type *type, 2864 ir_constant *val) 2865{ 2866 if (type->is_record()) { 2867 ir_constant *field_constant; 2868 2869 field_constant = (ir_constant *)val->components.get_head(); 2870 2871 for (unsigned int i = 0; i < type->length; i++) { 2872 const glsl_type *field_type = type->fields.structure[i].type; 2873 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2874 type->fields.structure[i].name); 2875 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2876 field_type, field_constant); 2877 field_constant = (ir_constant *)field_constant->next; 2878 } 2879 return; 2880 } 2881 2882 int loc = _mesa_get_uniform_location(ctx, shader_program, name); 2883 2884 if (loc == -1) { 2885 fail_link(shader_program, 2886 "Couldn't find uniform for initializer %s\n", name); 2887 return; 2888 } 2889 2890 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2891 ir_constant *element; 2892 const glsl_type *element_type; 2893 if (type->is_array()) { 2894 element = val->array_elements[i]; 2895 element_type = type->fields.array; 2896 } else { 2897 element = val; 2898 element_type = type; 2899 } 2900 2901 void *values; 2902 2903 if (element_type->base_type == GLSL_TYPE_BOOL) { 2904 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2905 for (unsigned int j = 0; j < element_type->components(); j++) { 2906 conv[j] = element->value.b[j]; 2907 } 2908 values = (void *)conv; 2909 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2910 element_type->vector_elements, 2911 1); 2912 } else { 2913 values = &element->value; 2914 } 2915 2916 if (element_type->is_matrix()) { 2917 _mesa_uniform_matrix(ctx, shader_program, 2918 element_type->matrix_columns, 2919 element_type->vector_elements, 2920 loc, 1, GL_FALSE, (GLfloat *)values); 2921 } else { 2922 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2923 values, element_type->gl_type); 2924 } 2925 2926 loc++; 2927 } 2928} 2929 2930/** 2931 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2932 * are read from the given src in this instruction 2933 */ 2934static int 2935get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2936{ 2937 int read_mask = 0, comp; 2938 2939 /* Now, given the src swizzle and the written channels, find which 2940 * components are actually read 2941 */ 2942 for (comp = 0; comp < 4; ++comp) { 2943 const unsigned coord = GET_SWZ(src.swizzle, comp); 2944 ASSERT(coord < 4); 2945 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2946 read_mask |= 1 << coord; 2947 } 2948 2949 return read_mask; 2950} 2951 2952/** 2953 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 2954 * instruction is the first instruction to write to register T0. There are 2955 * several lowering passes done in GLSL IR (e.g. branches and 2956 * relative addressing) that create a large number of conditional assignments 2957 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 2958 * 2959 * Here is why this conversion is safe: 2960 * CMP T0, T1 T2 T0 can be expanded to: 2961 * if (T1 < 0.0) 2962 * MOV T0, T2; 2963 * else 2964 * MOV T0, T0; 2965 * 2966 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 2967 * as the original program. If (T1 < 0.0) evaluates to false, executing 2968 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 2969 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 2970 * because any instruction that was going to read from T0 after this was going 2971 * to read a garbage value anyway. 2972 */ 2973void 2974glsl_to_tgsi_visitor::simplify_cmp(void) 2975{ 2976 unsigned *tempWrites; 2977 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 2978 2979 tempWrites = new unsigned[MAX_TEMPS]; 2980 if (!tempWrites) { 2981 return; 2982 } 2983 memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); 2984 memset(outputWrites, 0, sizeof(outputWrites)); 2985 2986 foreach_iter(exec_list_iterator, iter, this->instructions) { 2987 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2988 unsigned prevWriteMask = 0; 2989 2990 /* Give up if we encounter relative addressing or flow control. */ 2991 if (inst->dst.reladdr || 2992 tgsi_get_opcode_info(inst->op)->is_branch || 2993 inst->op == TGSI_OPCODE_BGNSUB || 2994 inst->op == TGSI_OPCODE_CONT || 2995 inst->op == TGSI_OPCODE_END || 2996 inst->op == TGSI_OPCODE_ENDSUB || 2997 inst->op == TGSI_OPCODE_RET) { 2998 break; 2999 } 3000 3001 if (inst->dst.file == PROGRAM_OUTPUT) { 3002 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3003 prevWriteMask = outputWrites[inst->dst.index]; 3004 outputWrites[inst->dst.index] |= inst->dst.writemask; 3005 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3006 assert(inst->dst.index < MAX_TEMPS); 3007 prevWriteMask = tempWrites[inst->dst.index]; 3008 tempWrites[inst->dst.index] |= inst->dst.writemask; 3009 } 3010 3011 /* For a CMP to be considered a conditional write, the destination 3012 * register and source register two must be the same. */ 3013 if (inst->op == TGSI_OPCODE_CMP 3014 && !(inst->dst.writemask & prevWriteMask) 3015 && inst->src[2].file == inst->dst.file 3016 && inst->src[2].index == inst->dst.index 3017 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3018 3019 inst->op = TGSI_OPCODE_MOV; 3020 inst->src[0] = inst->src[1]; 3021 } 3022 } 3023 3024 delete [] tempWrites; 3025} 3026 3027/* Replaces all references to a temporary register index with another index. */ 3028void 3029glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3030{ 3031 foreach_iter(exec_list_iterator, iter, this->instructions) { 3032 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3033 unsigned j; 3034 3035 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3036 if (inst->src[j].file == PROGRAM_TEMPORARY && 3037 inst->src[j].index == index) { 3038 inst->src[j].index = new_index; 3039 } 3040 } 3041 3042 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3043 inst->dst.index = new_index; 3044 } 3045 } 3046} 3047 3048int 3049glsl_to_tgsi_visitor::get_first_temp_read(int index) 3050{ 3051 int depth = 0; /* loop depth */ 3052 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3053 unsigned i = 0, j; 3054 3055 foreach_iter(exec_list_iterator, iter, this->instructions) { 3056 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3057 3058 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3059 if (inst->src[j].file == PROGRAM_TEMPORARY && 3060 inst->src[j].index == index) { 3061 return (depth == 0) ? i : loop_start; 3062 } 3063 } 3064 3065 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3066 if(depth++ == 0) 3067 loop_start = i; 3068 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3069 if (--depth == 0) 3070 loop_start = -1; 3071 } 3072 assert(depth >= 0); 3073 3074 i++; 3075 } 3076 3077 return -1; 3078} 3079 3080int 3081glsl_to_tgsi_visitor::get_first_temp_write(int index) 3082{ 3083 int depth = 0; /* loop depth */ 3084 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3085 int i = 0; 3086 3087 foreach_iter(exec_list_iterator, iter, this->instructions) { 3088 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3089 3090 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3091 return (depth == 0) ? i : loop_start; 3092 } 3093 3094 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3095 if(depth++ == 0) 3096 loop_start = i; 3097 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3098 if (--depth == 0) 3099 loop_start = -1; 3100 } 3101 assert(depth >= 0); 3102 3103 i++; 3104 } 3105 3106 return -1; 3107} 3108 3109int 3110glsl_to_tgsi_visitor::get_last_temp_read(int index) 3111{ 3112 int depth = 0; /* loop depth */ 3113 int last = -1; /* index of last instruction that reads the temporary */ 3114 unsigned i = 0, j; 3115 3116 foreach_iter(exec_list_iterator, iter, this->instructions) { 3117 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3118 3119 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3120 if (inst->src[j].file == PROGRAM_TEMPORARY && 3121 inst->src[j].index == index) { 3122 last = (depth == 0) ? i : -2; 3123 } 3124 } 3125 3126 if (inst->op == TGSI_OPCODE_BGNLOOP) 3127 depth++; 3128 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3129 if (--depth == 0 && last == -2) 3130 last = i; 3131 assert(depth >= 0); 3132 3133 i++; 3134 } 3135 3136 assert(last >= -1); 3137 return last; 3138} 3139 3140int 3141glsl_to_tgsi_visitor::get_last_temp_write(int index) 3142{ 3143 int depth = 0; /* loop depth */ 3144 int last = -1; /* index of last instruction that writes to the temporary */ 3145 int i = 0; 3146 3147 foreach_iter(exec_list_iterator, iter, this->instructions) { 3148 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3149 3150 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3151 last = (depth == 0) ? i : -2; 3152 3153 if (inst->op == TGSI_OPCODE_BGNLOOP) 3154 depth++; 3155 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3156 if (--depth == 0 && last == -2) 3157 last = i; 3158 assert(depth >= 0); 3159 3160 i++; 3161 } 3162 3163 assert(last >= -1); 3164 return last; 3165} 3166 3167/* 3168 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3169 * channels for copy propagation and updates following instructions to 3170 * use the original versions. 3171 * 3172 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3173 * will occur. As an example, a TXP production before this pass: 3174 * 3175 * 0: MOV TEMP[1], INPUT[4].xyyy; 3176 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3177 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3178 * 3179 * and after: 3180 * 3181 * 0: MOV TEMP[1], INPUT[4].xyyy; 3182 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3183 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3184 * 3185 * which allows for dead code elimination on TEMP[1]'s writes. 3186 */ 3187void 3188glsl_to_tgsi_visitor::copy_propagate(void) 3189{ 3190 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3191 glsl_to_tgsi_instruction *, 3192 this->next_temp * 4); 3193 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3194 int level = 0; 3195 3196 foreach_iter(exec_list_iterator, iter, this->instructions) { 3197 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3198 3199 assert(inst->dst.file != PROGRAM_TEMPORARY 3200 || inst->dst.index < this->next_temp); 3201 3202 /* First, do any copy propagation possible into the src regs. */ 3203 for (int r = 0; r < 3; r++) { 3204 glsl_to_tgsi_instruction *first = NULL; 3205 bool good = true; 3206 int acp_base = inst->src[r].index * 4; 3207 3208 if (inst->src[r].file != PROGRAM_TEMPORARY || 3209 inst->src[r].reladdr) 3210 continue; 3211 3212 /* See if we can find entries in the ACP consisting of MOVs 3213 * from the same src register for all the swizzled channels 3214 * of this src register reference. 3215 */ 3216 for (int i = 0; i < 4; i++) { 3217 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3218 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3219 3220 if (!copy_chan) { 3221 good = false; 3222 break; 3223 } 3224 3225 assert(acp_level[acp_base + src_chan] <= level); 3226 3227 if (!first) { 3228 first = copy_chan; 3229 } else { 3230 if (first->src[0].file != copy_chan->src[0].file || 3231 first->src[0].index != copy_chan->src[0].index) { 3232 good = false; 3233 break; 3234 } 3235 } 3236 } 3237 3238 if (good) { 3239 /* We've now validated that we can copy-propagate to 3240 * replace this src register reference. Do it. 3241 */ 3242 inst->src[r].file = first->src[0].file; 3243 inst->src[r].index = first->src[0].index; 3244 3245 int swizzle = 0; 3246 for (int i = 0; i < 4; i++) { 3247 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3248 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3249 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3250 (3 * i)); 3251 } 3252 inst->src[r].swizzle = swizzle; 3253 } 3254 } 3255 3256 switch (inst->op) { 3257 case TGSI_OPCODE_BGNLOOP: 3258 case TGSI_OPCODE_ENDLOOP: 3259 /* End of a basic block, clear the ACP entirely. */ 3260 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3261 break; 3262 3263 case TGSI_OPCODE_IF: 3264 ++level; 3265 break; 3266 3267 case TGSI_OPCODE_ENDIF: 3268 case TGSI_OPCODE_ELSE: 3269 /* Clear all channels written inside the block from the ACP, but 3270 * leaving those that were not touched. 3271 */ 3272 for (int r = 0; r < this->next_temp; r++) { 3273 for (int c = 0; c < 4; c++) { 3274 if (!acp[4 * r + c]) 3275 continue; 3276 3277 if (acp_level[4 * r + c] >= level) 3278 acp[4 * r + c] = NULL; 3279 } 3280 } 3281 if (inst->op == TGSI_OPCODE_ENDIF) 3282 --level; 3283 break; 3284 3285 default: 3286 /* Continuing the block, clear any written channels from 3287 * the ACP. 3288 */ 3289 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3290 /* Any temporary might be written, so no copy propagation 3291 * across this instruction. 3292 */ 3293 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3294 } else if (inst->dst.file == PROGRAM_OUTPUT && 3295 inst->dst.reladdr) { 3296 /* Any output might be written, so no copy propagation 3297 * from outputs across this instruction. 3298 */ 3299 for (int r = 0; r < this->next_temp; r++) { 3300 for (int c = 0; c < 4; c++) { 3301 if (!acp[4 * r + c]) 3302 continue; 3303 3304 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3305 acp[4 * r + c] = NULL; 3306 } 3307 } 3308 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3309 inst->dst.file == PROGRAM_OUTPUT) { 3310 /* Clear where it's used as dst. */ 3311 if (inst->dst.file == PROGRAM_TEMPORARY) { 3312 for (int c = 0; c < 4; c++) { 3313 if (inst->dst.writemask & (1 << c)) { 3314 acp[4 * inst->dst.index + c] = NULL; 3315 } 3316 } 3317 } 3318 3319 /* Clear where it's used as src. */ 3320 for (int r = 0; r < this->next_temp; r++) { 3321 for (int c = 0; c < 4; c++) { 3322 if (!acp[4 * r + c]) 3323 continue; 3324 3325 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3326 3327 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3328 acp[4 * r + c]->src[0].index == inst->dst.index && 3329 inst->dst.writemask & (1 << src_chan)) 3330 { 3331 acp[4 * r + c] = NULL; 3332 } 3333 } 3334 } 3335 } 3336 break; 3337 } 3338 3339 /* If this is a copy, add it to the ACP. */ 3340 if (inst->op == TGSI_OPCODE_MOV && 3341 inst->dst.file == PROGRAM_TEMPORARY && 3342 !inst->dst.reladdr && 3343 !inst->saturate && 3344 !inst->src[0].reladdr && 3345 !inst->src[0].negate) { 3346 for (int i = 0; i < 4; i++) { 3347 if (inst->dst.writemask & (1 << i)) { 3348 acp[4 * inst->dst.index + i] = inst; 3349 acp_level[4 * inst->dst.index + i] = level; 3350 } 3351 } 3352 } 3353 } 3354 3355 ralloc_free(acp_level); 3356 ralloc_free(acp); 3357} 3358 3359/* 3360 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3361 * 3362 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3363 * will occur. As an example, a TXP production after copy propagation but 3364 * before this pass: 3365 * 3366 * 0: MOV TEMP[1], INPUT[4].xyyy; 3367 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3368 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3369 * 3370 * and after this pass: 3371 * 3372 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3373 * 3374 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3375 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3376 */ 3377void 3378glsl_to_tgsi_visitor::eliminate_dead_code(void) 3379{ 3380 int i; 3381 3382 for (i=0; i < this->next_temp; i++) { 3383 int last_read = get_last_temp_read(i); 3384 int j = 0; 3385 3386 foreach_iter(exec_list_iterator, iter, this->instructions) { 3387 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3388 3389 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3390 j > last_read) 3391 { 3392 iter.remove(); 3393 delete inst; 3394 } 3395 3396 j++; 3397 } 3398 } 3399} 3400 3401/* 3402 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3403 * code elimination. This is less primitive than eliminate_dead_code(), as it 3404 * is per-channel and can detect consecutive writes without a read between them 3405 * as dead code. However, there is some dead code that can be eliminated by 3406 * eliminate_dead_code() but not this function - for example, this function 3407 * cannot eliminate an instruction writing to a register that is never read and 3408 * is the only instruction writing to that register. 3409 * 3410 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3411 * will occur. 3412 */ 3413int 3414glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3415{ 3416 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3417 glsl_to_tgsi_instruction *, 3418 this->next_temp * 4); 3419 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3420 int level = 0; 3421 int removed = 0; 3422 3423 foreach_iter(exec_list_iterator, iter, this->instructions) { 3424 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3425 3426 assert(inst->dst.file != PROGRAM_TEMPORARY 3427 || inst->dst.index < this->next_temp); 3428 3429 switch (inst->op) { 3430 case TGSI_OPCODE_BGNLOOP: 3431 case TGSI_OPCODE_ENDLOOP: 3432 case TGSI_OPCODE_CONT: 3433 case TGSI_OPCODE_BRK: 3434 /* End of a basic block, clear the write array entirely. 3435 * 3436 * This keeps us from killing dead code when the writes are 3437 * on either side of a loop, even when the register isn't touched 3438 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3439 * dead code of this type, so it shouldn't make a difference as long as 3440 * the dead code elimination pass in the GLSL compiler does its job. 3441 */ 3442 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3443 break; 3444 3445 case TGSI_OPCODE_ENDIF: 3446 case TGSI_OPCODE_ELSE: 3447 /* Promote the recorded level of all channels written inside the 3448 * preceding if or else block to the level above the if/else block. 3449 */ 3450 for (int r = 0; r < this->next_temp; r++) { 3451 for (int c = 0; c < 4; c++) { 3452 if (!writes[4 * r + c]) 3453 continue; 3454 3455 if (write_level[4 * r + c] == level) 3456 write_level[4 * r + c] = level-1; 3457 } 3458 } 3459 3460 if(inst->op == TGSI_OPCODE_ENDIF) 3461 --level; 3462 3463 break; 3464 3465 case TGSI_OPCODE_IF: 3466 ++level; 3467 /* fallthrough to default case to mark the condition as read */ 3468 3469 default: 3470 /* Continuing the block, clear any channels from the write array that 3471 * are read by this instruction. 3472 */ 3473 for (unsigned i = 0; i < Elements(inst->src); i++) { 3474 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3475 /* Any temporary might be read, so no dead code elimination 3476 * across this instruction. 3477 */ 3478 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3479 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3480 /* Clear where it's used as src. */ 3481 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3482 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3483 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3484 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3485 3486 for (int c = 0; c < 4; c++) { 3487 if (src_chans & (1 << c)) { 3488 writes[4 * inst->src[i].index + c] = NULL; 3489 } 3490 } 3491 } 3492 } 3493 break; 3494 } 3495 3496 /* If this instruction writes to a temporary, add it to the write array. 3497 * If there is already an instruction in the write array for one or more 3498 * of the channels, flag that channel write as dead. 3499 */ 3500 if (inst->dst.file == PROGRAM_TEMPORARY && 3501 !inst->dst.reladdr && 3502 !inst->saturate) { 3503 for (int c = 0; c < 4; c++) { 3504 if (inst->dst.writemask & (1 << c)) { 3505 if (writes[4 * inst->dst.index + c]) { 3506 if (write_level[4 * inst->dst.index + c] < level) 3507 continue; 3508 else 3509 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3510 } 3511 writes[4 * inst->dst.index + c] = inst; 3512 write_level[4 * inst->dst.index + c] = level; 3513 } 3514 } 3515 } 3516 } 3517 3518 /* Anything still in the write array at this point is dead code. */ 3519 for (int r = 0; r < this->next_temp; r++) { 3520 for (int c = 0; c < 4; c++) { 3521 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3522 if (inst) 3523 inst->dead_mask |= (1 << c); 3524 } 3525 } 3526 3527 /* Now actually remove the instructions that are completely dead and update 3528 * the writemask of other instructions with dead channels. 3529 */ 3530 foreach_iter(exec_list_iterator, iter, this->instructions) { 3531 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3532 3533 if (!inst->dead_mask || !inst->dst.writemask) 3534 continue; 3535 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3536 iter.remove(); 3537 delete inst; 3538 removed++; 3539 } else 3540 inst->dst.writemask &= ~(inst->dead_mask); 3541 } 3542 3543 ralloc_free(write_level); 3544 ralloc_free(writes); 3545 3546 return removed; 3547} 3548 3549/* Merges temporary registers together where possible to reduce the number of 3550 * registers needed to run a program. 3551 * 3552 * Produces optimal code only after copy propagation and dead code elimination 3553 * have been run. */ 3554void 3555glsl_to_tgsi_visitor::merge_registers(void) 3556{ 3557 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3558 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3559 int i, j; 3560 3561 /* Read the indices of the last read and first write to each temp register 3562 * into an array so that we don't have to traverse the instruction list as 3563 * much. */ 3564 for (i=0; i < this->next_temp; i++) { 3565 last_reads[i] = get_last_temp_read(i); 3566 first_writes[i] = get_first_temp_write(i); 3567 } 3568 3569 /* Start looking for registers with non-overlapping usages that can be 3570 * merged together. */ 3571 for (i=0; i < this->next_temp; i++) { 3572 /* Don't touch unused registers. */ 3573 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3574 3575 for (j=0; j < this->next_temp; j++) { 3576 /* Don't touch unused registers. */ 3577 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3578 3579 /* We can merge the two registers if the first write to j is after or 3580 * in the same instruction as the last read from i. Note that the 3581 * register at index i will always be used earlier or at the same time 3582 * as the register at index j. */ 3583 if (first_writes[i] <= first_writes[j] && 3584 last_reads[i] <= first_writes[j]) 3585 { 3586 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3587 3588 /* Update the first_writes and last_reads arrays with the new 3589 * values for the merged register index, and mark the newly unused 3590 * register index as such. */ 3591 last_reads[i] = last_reads[j]; 3592 first_writes[j] = -1; 3593 last_reads[j] = -1; 3594 } 3595 } 3596 } 3597 3598 ralloc_free(last_reads); 3599 ralloc_free(first_writes); 3600} 3601 3602/* Reassign indices to temporary registers by reusing unused indices created 3603 * by optimization passes. */ 3604void 3605glsl_to_tgsi_visitor::renumber_registers(void) 3606{ 3607 int i = 0; 3608 int new_index = 0; 3609 3610 for (i=0; i < this->next_temp; i++) { 3611 if (get_first_temp_read(i) < 0) continue; 3612 if (i != new_index) 3613 rename_temp_register(i, new_index); 3614 new_index++; 3615 } 3616 3617 this->next_temp = new_index; 3618} 3619 3620/** 3621 * Returns a fragment program which implements the current pixel transfer ops. 3622 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3623 */ 3624extern "C" void 3625get_pixel_transfer_visitor(struct st_fragment_program *fp, 3626 glsl_to_tgsi_visitor *original, 3627 int scale_and_bias, int pixel_maps) 3628{ 3629 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3630 struct st_context *st = st_context(original->ctx); 3631 struct gl_program *prog = &fp->Base.Base; 3632 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3633 st_src_reg coord, src0; 3634 st_dst_reg dst0; 3635 glsl_to_tgsi_instruction *inst; 3636 3637 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3638 v->ctx = original->ctx; 3639 v->prog = prog; 3640 v->shader_program = NULL; 3641 v->glsl_version = original->glsl_version; 3642 v->native_integers = original->native_integers; 3643 v->options = original->options; 3644 v->next_temp = original->next_temp; 3645 v->num_address_regs = original->num_address_regs; 3646 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3647 v->indirect_addr_temps = original->indirect_addr_temps; 3648 v->indirect_addr_consts = original->indirect_addr_consts; 3649 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3650 v->num_immediates = original->num_immediates; 3651 3652 /* 3653 * Get initial pixel color from the texture. 3654 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3655 */ 3656 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3657 src0 = v->get_temp(glsl_type::vec4_type); 3658 dst0 = st_dst_reg(src0); 3659 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3660 inst->sampler = 0; 3661 inst->tex_target = TEXTURE_2D_INDEX; 3662 3663 prog->InputsRead |= FRAG_BIT_TEX0; 3664 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3665 v->samplers_used |= (1 << 0); 3666 3667 if (scale_and_bias) { 3668 static const gl_state_index scale_state[STATE_LENGTH] = 3669 { STATE_INTERNAL, STATE_PT_SCALE, 3670 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3671 static const gl_state_index bias_state[STATE_LENGTH] = 3672 { STATE_INTERNAL, STATE_PT_BIAS, 3673 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3674 GLint scale_p, bias_p; 3675 st_src_reg scale, bias; 3676 3677 scale_p = _mesa_add_state_reference(params, scale_state); 3678 bias_p = _mesa_add_state_reference(params, bias_state); 3679 3680 /* MAD colorTemp, colorTemp, scale, bias; */ 3681 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3682 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3683 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3684 } 3685 3686 if (pixel_maps) { 3687 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3688 st_dst_reg temp_dst = st_dst_reg(temp); 3689 3690 assert(st->pixel_xfer.pixelmap_texture); 3691 3692 /* With a little effort, we can do four pixel map look-ups with 3693 * two TEX instructions: 3694 */ 3695 3696 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3697 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3698 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3699 inst->sampler = 1; 3700 inst->tex_target = TEXTURE_2D_INDEX; 3701 3702 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3703 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3704 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3705 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3706 inst->sampler = 1; 3707 inst->tex_target = TEXTURE_2D_INDEX; 3708 3709 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3710 v->samplers_used |= (1 << 1); 3711 3712 /* MOV colorTemp, temp; */ 3713 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3714 } 3715 3716 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3717 * new visitor. */ 3718 foreach_iter(exec_list_iterator, iter, original->instructions) { 3719 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3720 glsl_to_tgsi_instruction *newinst; 3721 st_src_reg src_regs[3]; 3722 3723 if (inst->dst.file == PROGRAM_OUTPUT) 3724 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3725 3726 for (int i=0; i<3; i++) { 3727 src_regs[i] = inst->src[i]; 3728 if (src_regs[i].file == PROGRAM_INPUT && 3729 src_regs[i].index == FRAG_ATTRIB_COL0) 3730 { 3731 src_regs[i].file = PROGRAM_TEMPORARY; 3732 src_regs[i].index = src0.index; 3733 } 3734 else if (src_regs[i].file == PROGRAM_INPUT) 3735 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3736 } 3737 3738 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3739 newinst->tex_target = inst->tex_target; 3740 } 3741 3742 /* Make modifications to fragment program info. */ 3743 prog->Parameters = _mesa_combine_parameter_lists(params, 3744 original->prog->Parameters); 3745 _mesa_free_parameter_list(params); 3746 count_resources(v, prog); 3747 fp->glsl_to_tgsi = v; 3748} 3749 3750/** 3751 * Make fragment program for glBitmap: 3752 * Sample the texture and kill the fragment if the bit is 0. 3753 * This program will be combined with the user's fragment program. 3754 * 3755 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3756 */ 3757extern "C" void 3758get_bitmap_visitor(struct st_fragment_program *fp, 3759 glsl_to_tgsi_visitor *original, int samplerIndex) 3760{ 3761 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3762 struct st_context *st = st_context(original->ctx); 3763 struct gl_program *prog = &fp->Base.Base; 3764 st_src_reg coord, src0; 3765 st_dst_reg dst0; 3766 glsl_to_tgsi_instruction *inst; 3767 3768 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3769 v->ctx = original->ctx; 3770 v->prog = prog; 3771 v->shader_program = NULL; 3772 v->glsl_version = original->glsl_version; 3773 v->native_integers = original->native_integers; 3774 v->options = original->options; 3775 v->next_temp = original->next_temp; 3776 v->num_address_regs = original->num_address_regs; 3777 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3778 v->indirect_addr_temps = original->indirect_addr_temps; 3779 v->indirect_addr_consts = original->indirect_addr_consts; 3780 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3781 v->num_immediates = original->num_immediates; 3782 3783 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3784 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3785 src0 = v->get_temp(glsl_type::vec4_type); 3786 dst0 = st_dst_reg(src0); 3787 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3788 inst->sampler = samplerIndex; 3789 inst->tex_target = TEXTURE_2D_INDEX; 3790 3791 prog->InputsRead |= FRAG_BIT_TEX0; 3792 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3793 v->samplers_used |= (1 << samplerIndex); 3794 3795 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3796 src0.negate = NEGATE_XYZW; 3797 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3798 src0.swizzle = SWIZZLE_XXXX; 3799 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3800 3801 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3802 * new visitor. */ 3803 foreach_iter(exec_list_iterator, iter, original->instructions) { 3804 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3805 glsl_to_tgsi_instruction *newinst; 3806 st_src_reg src_regs[3]; 3807 3808 if (inst->dst.file == PROGRAM_OUTPUT) 3809 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3810 3811 for (int i=0; i<3; i++) { 3812 src_regs[i] = inst->src[i]; 3813 if (src_regs[i].file == PROGRAM_INPUT) 3814 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3815 } 3816 3817 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3818 newinst->tex_target = inst->tex_target; 3819 } 3820 3821 /* Make modifications to fragment program info. */ 3822 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3823 count_resources(v, prog); 3824 fp->glsl_to_tgsi = v; 3825} 3826 3827/* ------------------------- TGSI conversion stuff -------------------------- */ 3828struct label { 3829 unsigned branch_target; 3830 unsigned token; 3831}; 3832 3833/** 3834 * Intermediate state used during shader translation. 3835 */ 3836struct st_translate { 3837 struct ureg_program *ureg; 3838 3839 struct ureg_dst temps[MAX_TEMPS]; 3840 struct ureg_src *constants; 3841 struct ureg_src *immediates; 3842 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3843 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3844 struct ureg_dst address[1]; 3845 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3846 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3847 3848 const GLuint *inputMapping; 3849 const GLuint *outputMapping; 3850 3851 /* For every instruction that contains a label (eg CALL), keep 3852 * details so that we can go back afterwards and emit the correct 3853 * tgsi instruction number for each label. 3854 */ 3855 struct label *labels; 3856 unsigned labels_size; 3857 unsigned labels_count; 3858 3859 /* Keep a record of the tgsi instruction number that each mesa 3860 * instruction starts at, will be used to fix up labels after 3861 * translation. 3862 */ 3863 unsigned *insn; 3864 unsigned insn_size; 3865 unsigned insn_count; 3866 3867 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3868 3869 boolean error; 3870}; 3871 3872/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3873static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3874 TGSI_SEMANTIC_FACE, 3875 TGSI_SEMANTIC_VERTEXID, 3876 TGSI_SEMANTIC_INSTANCEID 3877}; 3878 3879/** 3880 * Make note of a branch to a label in the TGSI code. 3881 * After we've emitted all instructions, we'll go over the list 3882 * of labels built here and patch the TGSI code with the actual 3883 * location of each label. 3884 */ 3885static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3886{ 3887 unsigned i; 3888 3889 if (t->labels_count + 1 >= t->labels_size) { 3890 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3891 t->labels = (struct label *)realloc(t->labels, 3892 t->labels_size * sizeof(struct label)); 3893 if (t->labels == NULL) { 3894 static unsigned dummy; 3895 t->error = TRUE; 3896 return &dummy; 3897 } 3898 } 3899 3900 i = t->labels_count++; 3901 t->labels[i].branch_target = branch_target; 3902 return &t->labels[i].token; 3903} 3904 3905/** 3906 * Called prior to emitting the TGSI code for each instruction. 3907 * Allocate additional space for instructions if needed. 3908 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3909 * the next TGSI instruction. 3910 */ 3911static void set_insn_start(struct st_translate *t, unsigned start) 3912{ 3913 if (t->insn_count + 1 >= t->insn_size) { 3914 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3915 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3916 if (t->insn == NULL) { 3917 t->error = TRUE; 3918 return; 3919 } 3920 } 3921 3922 t->insn[t->insn_count++] = start; 3923} 3924 3925/** 3926 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 3927 */ 3928static struct ureg_src 3929emit_immediate(struct st_translate *t, 3930 gl_constant_value values[4], 3931 int type, int size) 3932{ 3933 struct ureg_program *ureg = t->ureg; 3934 3935 switch(type) 3936 { 3937 case GL_FLOAT: 3938 return ureg_DECL_immediate(ureg, &values[0].f, size); 3939 case GL_INT: 3940 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 3941 case GL_UNSIGNED_INT: 3942 case GL_BOOL: 3943 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 3944 default: 3945 assert(!"should not get here - type must be float, int, uint, or bool"); 3946 return ureg_src_undef(); 3947 } 3948} 3949 3950/** 3951 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 3952 */ 3953static struct ureg_dst 3954dst_register(struct st_translate *t, 3955 gl_register_file file, 3956 GLuint index) 3957{ 3958 switch(file) { 3959 case PROGRAM_UNDEFINED: 3960 return ureg_dst_undef(); 3961 3962 case PROGRAM_TEMPORARY: 3963 if (ureg_dst_is_undef(t->temps[index])) 3964 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 3965 3966 return t->temps[index]; 3967 3968 case PROGRAM_OUTPUT: 3969 if (t->procType == TGSI_PROCESSOR_VERTEX) 3970 assert(index < VERT_RESULT_MAX); 3971 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 3972 assert(index < FRAG_RESULT_MAX); 3973 else 3974 assert(index < GEOM_RESULT_MAX); 3975 3976 assert(t->outputMapping[index] < Elements(t->outputs)); 3977 3978 return t->outputs[t->outputMapping[index]]; 3979 3980 case PROGRAM_ADDRESS: 3981 return t->address[index]; 3982 3983 default: 3984 assert(!"unknown dst register file"); 3985 return ureg_dst_undef(); 3986 } 3987} 3988 3989/** 3990 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 3991 */ 3992static struct ureg_src 3993src_register(struct st_translate *t, 3994 gl_register_file file, 3995 GLuint index) 3996{ 3997 switch(file) { 3998 case PROGRAM_UNDEFINED: 3999 return ureg_src_undef(); 4000 4001 case PROGRAM_TEMPORARY: 4002 assert(index >= 0); 4003 assert(index < Elements(t->temps)); 4004 if (ureg_dst_is_undef(t->temps[index])) 4005 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4006 return ureg_src(t->temps[index]); 4007 4008 case PROGRAM_NAMED_PARAM: 4009 case PROGRAM_ENV_PARAM: 4010 case PROGRAM_LOCAL_PARAM: 4011 case PROGRAM_UNIFORM: 4012 assert(index >= 0); 4013 return t->constants[index]; 4014 case PROGRAM_STATE_VAR: 4015 case PROGRAM_CONSTANT: /* ie, immediate */ 4016 if (index < 0) 4017 return ureg_DECL_constant(t->ureg, 0); 4018 else 4019 return t->constants[index]; 4020 4021 case PROGRAM_IMMEDIATE: 4022 return t->immediates[index]; 4023 4024 case PROGRAM_INPUT: 4025 assert(t->inputMapping[index] < Elements(t->inputs)); 4026 return t->inputs[t->inputMapping[index]]; 4027 4028 case PROGRAM_OUTPUT: 4029 assert(t->outputMapping[index] < Elements(t->outputs)); 4030 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4031 4032 case PROGRAM_ADDRESS: 4033 return ureg_src(t->address[index]); 4034 4035 case PROGRAM_SYSTEM_VALUE: 4036 assert(index < Elements(t->systemValues)); 4037 return t->systemValues[index]; 4038 4039 default: 4040 assert(!"unknown src register file"); 4041 return ureg_src_undef(); 4042 } 4043} 4044 4045/** 4046 * Create a TGSI ureg_dst register from an st_dst_reg. 4047 */ 4048static struct ureg_dst 4049translate_dst(struct st_translate *t, 4050 const st_dst_reg *dst_reg, 4051 bool saturate, bool clamp_color) 4052{ 4053 struct ureg_dst dst = dst_register(t, 4054 dst_reg->file, 4055 dst_reg->index); 4056 4057 dst = ureg_writemask(dst, dst_reg->writemask); 4058 4059 if (saturate) 4060 dst = ureg_saturate(dst); 4061 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { 4062 /* Clamp colors for ARB_color_buffer_float. */ 4063 switch (t->procType) { 4064 case TGSI_PROCESSOR_VERTEX: 4065 /* XXX if the geometry shader is present, this must be done there 4066 * instead of here. */ 4067 if (dst_reg->index == VERT_RESULT_COL0 || 4068 dst_reg->index == VERT_RESULT_COL1 || 4069 dst_reg->index == VERT_RESULT_BFC0 || 4070 dst_reg->index == VERT_RESULT_BFC1) { 4071 dst = ureg_saturate(dst); 4072 } 4073 break; 4074 4075 case TGSI_PROCESSOR_FRAGMENT: 4076 if (dst_reg->index >= FRAG_RESULT_COLOR) { 4077 dst = ureg_saturate(dst); 4078 } 4079 break; 4080 } 4081 } 4082 4083 if (dst_reg->reladdr != NULL) 4084 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4085 4086 return dst; 4087} 4088 4089/** 4090 * Create a TGSI ureg_src register from an st_src_reg. 4091 */ 4092static struct ureg_src 4093translate_src(struct st_translate *t, const st_src_reg *src_reg) 4094{ 4095 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4096 4097 src = ureg_swizzle(src, 4098 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4099 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4100 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4101 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4102 4103 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4104 src = ureg_negate(src); 4105 4106 if (src_reg->reladdr != NULL) { 4107 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4108 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4109 * set the bit for src.Negate. So we have to do the operation manually 4110 * here to work around the compiler's problems. */ 4111 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4112 struct ureg_src addr = ureg_src(t->address[0]); 4113 src.Indirect = 1; 4114 src.IndirectFile = addr.File; 4115 src.IndirectIndex = addr.Index; 4116 src.IndirectSwizzle = addr.SwizzleX; 4117 4118 if (src_reg->file != PROGRAM_INPUT && 4119 src_reg->file != PROGRAM_OUTPUT) { 4120 /* If src_reg->index was negative, it was set to zero in 4121 * src_register(). Reassign it now. But don't do this 4122 * for input/output regs since they get remapped while 4123 * const buffers don't. 4124 */ 4125 src.Index = src_reg->index; 4126 } 4127 } 4128 4129 return src; 4130} 4131 4132static struct tgsi_texture_offset 4133translate_tex_offset(struct st_translate *t, 4134 const struct tgsi_texture_offset *in_offset) 4135{ 4136 struct tgsi_texture_offset offset; 4137 4138 assert(in_offset->File == PROGRAM_IMMEDIATE); 4139 4140 offset.File = TGSI_FILE_IMMEDIATE; 4141 offset.Index = in_offset->Index; 4142 offset.SwizzleX = in_offset->SwizzleX; 4143 offset.SwizzleY = in_offset->SwizzleY; 4144 offset.SwizzleZ = in_offset->SwizzleZ; 4145 4146 return offset; 4147} 4148 4149static void 4150compile_tgsi_instruction(struct st_translate *t, 4151 const glsl_to_tgsi_instruction *inst, 4152 bool clamp_dst_color_output) 4153{ 4154 struct ureg_program *ureg = t->ureg; 4155 GLuint i; 4156 struct ureg_dst dst[1]; 4157 struct ureg_src src[4]; 4158 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4159 4160 unsigned num_dst; 4161 unsigned num_src; 4162 4163 num_dst = num_inst_dst_regs(inst->op); 4164 num_src = num_inst_src_regs(inst->op); 4165 4166 if (num_dst) 4167 dst[0] = translate_dst(t, 4168 &inst->dst, 4169 inst->saturate, 4170 clamp_dst_color_output); 4171 4172 for (i = 0; i < num_src; i++) 4173 src[i] = translate_src(t, &inst->src[i]); 4174 4175 switch(inst->op) { 4176 case TGSI_OPCODE_BGNLOOP: 4177 case TGSI_OPCODE_CAL: 4178 case TGSI_OPCODE_ELSE: 4179 case TGSI_OPCODE_ENDLOOP: 4180 case TGSI_OPCODE_IF: 4181 assert(num_dst == 0); 4182 ureg_label_insn(ureg, 4183 inst->op, 4184 src, num_src, 4185 get_label(t, 4186 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4187 return; 4188 4189 case TGSI_OPCODE_TEX: 4190 case TGSI_OPCODE_TXB: 4191 case TGSI_OPCODE_TXD: 4192 case TGSI_OPCODE_TXL: 4193 case TGSI_OPCODE_TXP: 4194 case TGSI_OPCODE_TXQ: 4195 case TGSI_OPCODE_TXF: 4196 src[num_src++] = t->samplers[inst->sampler]; 4197 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4198 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4199 } 4200 ureg_tex_insn(ureg, 4201 inst->op, 4202 dst, num_dst, 4203 st_translate_texture_target(inst->tex_target, inst->tex_shadow), 4204 texoffsets, inst->tex_offset_num_offset, 4205 src, num_src); 4206 return; 4207 4208 case TGSI_OPCODE_SCS: 4209 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4210 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4211 break; 4212 4213 default: 4214 ureg_insn(ureg, 4215 inst->op, 4216 dst, num_dst, 4217 src, num_src); 4218 break; 4219 } 4220} 4221 4222/** 4223 * Emit the TGSI instructions for inverting and adjusting WPOS. 4224 * This code is unavoidable because it also depends on whether 4225 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4226 */ 4227static void 4228emit_wpos_adjustment( struct st_translate *t, 4229 const struct gl_program *program, 4230 boolean invert, 4231 GLfloat adjX, GLfloat adjY[2]) 4232{ 4233 struct ureg_program *ureg = t->ureg; 4234 4235 /* Fragment program uses fragment position input. 4236 * Need to replace instances of INPUT[WPOS] with temp T 4237 * where T = INPUT[WPOS] by y is inverted. 4238 */ 4239 static const gl_state_index wposTransformState[STATE_LENGTH] 4240 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4241 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4242 4243 /* XXX: note we are modifying the incoming shader here! Need to 4244 * do this before emitting the constant decls below, or this 4245 * will be missed: 4246 */ 4247 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4248 wposTransformState); 4249 4250 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4251 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4252 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4253 4254 /* First, apply the coordinate shift: */ 4255 if (adjX || adjY[0] || adjY[1]) { 4256 if (adjY[0] != adjY[1]) { 4257 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4258 * depending on whether inversion is actually going to be applied 4259 * or not, which is determined by testing against the inversion 4260 * state variable used below, which will be either +1 or -1. 4261 */ 4262 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 4263 4264 ureg_CMP(ureg, adj_temp, 4265 ureg_scalar(wpostrans, invert ? 2 : 0), 4266 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4267 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4268 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4269 } else { 4270 ureg_ADD(ureg, wpos_temp, wpos_input, 4271 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4272 } 4273 wpos_input = ureg_src(wpos_temp); 4274 } else { 4275 /* MOV wpos_temp, input[wpos] 4276 */ 4277 ureg_MOV( ureg, wpos_temp, wpos_input ); 4278 } 4279 4280 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4281 * inversion/identity, or the other way around if we're drawing to an FBO. 4282 */ 4283 if (invert) { 4284 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4285 */ 4286 ureg_MAD( ureg, 4287 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4288 wpos_input, 4289 ureg_scalar(wpostrans, 0), 4290 ureg_scalar(wpostrans, 1)); 4291 } else { 4292 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4293 */ 4294 ureg_MAD( ureg, 4295 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4296 wpos_input, 4297 ureg_scalar(wpostrans, 2), 4298 ureg_scalar(wpostrans, 3)); 4299 } 4300 4301 /* Use wpos_temp as position input from here on: 4302 */ 4303 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4304} 4305 4306 4307/** 4308 * Emit fragment position/ooordinate code. 4309 */ 4310static void 4311emit_wpos(struct st_context *st, 4312 struct st_translate *t, 4313 const struct gl_program *program, 4314 struct ureg_program *ureg) 4315{ 4316 const struct gl_fragment_program *fp = 4317 (const struct gl_fragment_program *) program; 4318 struct pipe_screen *pscreen = st->pipe->screen; 4319 GLfloat adjX = 0.0f; 4320 GLfloat adjY[2] = { 0.0f, 0.0f }; 4321 boolean invert = FALSE; 4322 4323 /* Query the pixel center conventions supported by the pipe driver and set 4324 * adjX, adjY to help out if it cannot handle the requested one internally. 4325 * 4326 * The bias of the y-coordinate depends on whether y-inversion takes place 4327 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4328 * drawing to an FBO (causes additional inversion), and whether the the pipe 4329 * driver origin and the requested origin differ (the latter condition is 4330 * stored in the 'invert' variable). 4331 * 4332 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4333 * 4334 * center shift only: 4335 * i -> h: +0.5 4336 * h -> i: -0.5 4337 * 4338 * inversion only: 4339 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4340 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4341 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4342 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4343 * 4344 * inversion and center shift: 4345 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4346 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4347 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4348 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4349 */ 4350 if (fp->OriginUpperLeft) { 4351 /* Fragment shader wants origin in upper-left */ 4352 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4353 /* the driver supports upper-left origin */ 4354 } 4355 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4356 /* the driver supports lower-left origin, need to invert Y */ 4357 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4358 invert = TRUE; 4359 } 4360 else 4361 assert(0); 4362 } 4363 else { 4364 /* Fragment shader wants origin in lower-left */ 4365 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4366 /* the driver supports lower-left origin */ 4367 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4368 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4369 /* the driver supports upper-left origin, need to invert Y */ 4370 invert = TRUE; 4371 else 4372 assert(0); 4373 } 4374 4375 if (fp->PixelCenterInteger) { 4376 /* Fragment shader wants pixel center integer */ 4377 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4378 /* the driver supports pixel center integer */ 4379 adjY[1] = 1.0f; 4380 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4381 } 4382 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4383 /* the driver supports pixel center half integer, need to bias X,Y */ 4384 adjX = -0.5f; 4385 adjY[0] = -0.5f; 4386 adjY[1] = 0.5f; 4387 } 4388 else 4389 assert(0); 4390 } 4391 else { 4392 /* Fragment shader wants pixel center half integer */ 4393 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4394 /* the driver supports pixel center half integer */ 4395 } 4396 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4397 /* the driver supports pixel center integer, need to bias X,Y */ 4398 adjX = adjY[0] = adjY[1] = 0.5f; 4399 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4400 } 4401 else 4402 assert(0); 4403 } 4404 4405 /* we invert after adjustment so that we avoid the MOV to temporary, 4406 * and reuse the adjustment ADD instead */ 4407 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4408} 4409 4410/** 4411 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4412 * TGSI uses +1 for front, -1 for back. 4413 * This function converts the TGSI value to the GL value. Simply clamping/ 4414 * saturating the value to [0,1] does the job. 4415 */ 4416static void 4417emit_face_var(struct st_translate *t) 4418{ 4419 struct ureg_program *ureg = t->ureg; 4420 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4421 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4422 4423 /* MOV_SAT face_temp, input[face] */ 4424 face_temp = ureg_saturate(face_temp); 4425 ureg_MOV(ureg, face_temp, face_input); 4426 4427 /* Use face_temp as face input from here on: */ 4428 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4429} 4430 4431static void 4432emit_edgeflags(struct st_translate *t) 4433{ 4434 struct ureg_program *ureg = t->ureg; 4435 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4436 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4437 4438 ureg_MOV(ureg, edge_dst, edge_src); 4439} 4440 4441/** 4442 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4443 * \param program the program to translate 4444 * \param numInputs number of input registers used 4445 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4446 * input indexes 4447 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4448 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4449 * each input 4450 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4451 * \param numOutputs number of output registers used 4452 * \param outputMapping maps Mesa fragment program outputs to TGSI 4453 * generic outputs 4454 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4455 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4456 * each output 4457 * 4458 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4459 */ 4460extern "C" enum pipe_error 4461st_translate_program( 4462 struct gl_context *ctx, 4463 uint procType, 4464 struct ureg_program *ureg, 4465 glsl_to_tgsi_visitor *program, 4466 const struct gl_program *proginfo, 4467 GLuint numInputs, 4468 const GLuint inputMapping[], 4469 const ubyte inputSemanticName[], 4470 const ubyte inputSemanticIndex[], 4471 const GLuint interpMode[], 4472 GLuint numOutputs, 4473 const GLuint outputMapping[], 4474 const ubyte outputSemanticName[], 4475 const ubyte outputSemanticIndex[], 4476 boolean passthrough_edgeflags, 4477 boolean clamp_color) 4478{ 4479 struct st_translate *t; 4480 unsigned i; 4481 enum pipe_error ret = PIPE_OK; 4482 4483 assert(numInputs <= Elements(t->inputs)); 4484 assert(numOutputs <= Elements(t->outputs)); 4485 4486 t = CALLOC_STRUCT(st_translate); 4487 if (!t) { 4488 ret = PIPE_ERROR_OUT_OF_MEMORY; 4489 goto out; 4490 } 4491 4492 memset(t, 0, sizeof *t); 4493 4494 t->procType = procType; 4495 t->inputMapping = inputMapping; 4496 t->outputMapping = outputMapping; 4497 t->ureg = ureg; 4498 4499 if (program->shader_program) { 4500 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { 4501 struct gl_uniform_storage *const storage = 4502 &program->shader_program->UniformStorage[i]; 4503 4504 _mesa_uniform_detach_all_driver_storage(storage); 4505 } 4506 } 4507 4508 /* 4509 * Declare input attributes. 4510 */ 4511 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4512 for (i = 0; i < numInputs; i++) { 4513 t->inputs[i] = ureg_DECL_fs_input(ureg, 4514 inputSemanticName[i], 4515 inputSemanticIndex[i], 4516 interpMode[i]); 4517 } 4518 4519 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4520 /* Must do this after setting up t->inputs, and before 4521 * emitting constant references, below: 4522 */ 4523 emit_wpos(st_context(ctx), t, proginfo, ureg); 4524 } 4525 4526 if (proginfo->InputsRead & FRAG_BIT_FACE) 4527 emit_face_var(t); 4528 4529 /* 4530 * Declare output attributes. 4531 */ 4532 for (i = 0; i < numOutputs; i++) { 4533 switch (outputSemanticName[i]) { 4534 case TGSI_SEMANTIC_POSITION: 4535 t->outputs[i] = ureg_DECL_output(ureg, 4536 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4537 outputSemanticIndex[i]); 4538 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4539 break; 4540 case TGSI_SEMANTIC_STENCIL: 4541 t->outputs[i] = ureg_DECL_output(ureg, 4542 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4543 outputSemanticIndex[i]); 4544 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4545 break; 4546 case TGSI_SEMANTIC_COLOR: 4547 t->outputs[i] = ureg_DECL_output(ureg, 4548 TGSI_SEMANTIC_COLOR, 4549 outputSemanticIndex[i]); 4550 break; 4551 default: 4552 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4553 ret = PIPE_ERROR_BAD_INPUT; 4554 goto out; 4555 } 4556 } 4557 } 4558 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4559 for (i = 0; i < numInputs; i++) { 4560 t->inputs[i] = ureg_DECL_gs_input(ureg, 4561 i, 4562 inputSemanticName[i], 4563 inputSemanticIndex[i]); 4564 } 4565 4566 for (i = 0; i < numOutputs; i++) { 4567 t->outputs[i] = ureg_DECL_output(ureg, 4568 outputSemanticName[i], 4569 outputSemanticIndex[i]); 4570 } 4571 } 4572 else { 4573 assert(procType == TGSI_PROCESSOR_VERTEX); 4574 4575 for (i = 0; i < numInputs; i++) { 4576 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4577 } 4578 4579 for (i = 0; i < numOutputs; i++) { 4580 if (outputSemanticName[i] == TGSI_SEMANTIC_CLIPDIST) { 4581 int mask = ((1 << (program->num_clip_distances - 4*outputSemanticIndex[i])) - 1) & TGSI_WRITEMASK_XYZW; 4582 t->outputs[i] = ureg_DECL_output_masked(ureg, 4583 outputSemanticName[i], 4584 outputSemanticIndex[i], 4585 mask); 4586 } else { 4587 t->outputs[i] = ureg_DECL_output(ureg, 4588 outputSemanticName[i], 4589 outputSemanticIndex[i]); 4590 } 4591 } 4592 if (passthrough_edgeflags) 4593 emit_edgeflags(t); 4594 } 4595 4596 /* Declare address register. 4597 */ 4598 if (program->num_address_regs > 0) { 4599 assert(program->num_address_regs == 1); 4600 t->address[0] = ureg_DECL_address(ureg); 4601 } 4602 4603 /* Declare misc input registers 4604 */ 4605 { 4606 GLbitfield sysInputs = proginfo->SystemValuesRead; 4607 unsigned numSys = 0; 4608 for (i = 0; sysInputs; i++) { 4609 if (sysInputs & (1 << i)) { 4610 unsigned semName = mesa_sysval_to_semantic[i]; 4611 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4612 numSys++; 4613 sysInputs &= ~(1 << i); 4614 } 4615 } 4616 } 4617 4618 if (program->indirect_addr_temps) { 4619 /* If temps are accessed with indirect addressing, declare temporaries 4620 * in sequential order. Else, we declare them on demand elsewhere. 4621 * (Note: the number of temporaries is equal to program->next_temp) 4622 */ 4623 for (i = 0; i < (unsigned)program->next_temp; i++) { 4624 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4625 t->temps[i] = ureg_DECL_local_temporary(t->ureg); 4626 } 4627 } 4628 4629 /* Emit constants and uniforms. TGSI uses a single index space for these, 4630 * so we put all the translated regs in t->constants. 4631 */ 4632 if (proginfo->Parameters) { 4633 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4634 if (t->constants == NULL) { 4635 ret = PIPE_ERROR_OUT_OF_MEMORY; 4636 goto out; 4637 } 4638 4639 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4640 switch (proginfo->Parameters->Parameters[i].Type) { 4641 case PROGRAM_ENV_PARAM: 4642 case PROGRAM_LOCAL_PARAM: 4643 case PROGRAM_STATE_VAR: 4644 case PROGRAM_NAMED_PARAM: 4645 case PROGRAM_UNIFORM: 4646 t->constants[i] = ureg_DECL_constant(ureg, i); 4647 break; 4648 4649 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4650 * addressing of the const buffer. 4651 * FIXME: Be smarter and recognize param arrays: 4652 * indirect addressing is only valid within the referenced 4653 * array. 4654 */ 4655 case PROGRAM_CONSTANT: 4656 if (program->indirect_addr_consts) 4657 t->constants[i] = ureg_DECL_constant(ureg, i); 4658 else 4659 t->constants[i] = emit_immediate(t, 4660 proginfo->Parameters->ParameterValues[i], 4661 proginfo->Parameters->Parameters[i].DataType, 4662 4); 4663 break; 4664 default: 4665 break; 4666 } 4667 } 4668 } 4669 4670 /* Emit immediate values. 4671 */ 4672 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4673 if (t->immediates == NULL) { 4674 ret = PIPE_ERROR_OUT_OF_MEMORY; 4675 goto out; 4676 } 4677 i = 0; 4678 foreach_iter(exec_list_iterator, iter, program->immediates) { 4679 immediate_storage *imm = (immediate_storage *)iter.get(); 4680 assert(i < program->num_immediates); 4681 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4682 } 4683 assert(i == program->num_immediates); 4684 4685 /* texture samplers */ 4686 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4687 if (program->samplers_used & (1 << i)) { 4688 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4689 } 4690 } 4691 4692 /* Emit each instruction in turn: 4693 */ 4694 foreach_iter(exec_list_iterator, iter, program->instructions) { 4695 set_insn_start(t, ureg_get_instruction_number(ureg)); 4696 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), 4697 clamp_color); 4698 } 4699 4700 /* Fix up all emitted labels: 4701 */ 4702 for (i = 0; i < t->labels_count; i++) { 4703 ureg_fixup_label(ureg, t->labels[i].token, 4704 t->insn[t->labels[i].branch_target]); 4705 } 4706 4707 if (program->shader_program) { 4708 /* This has to be done last. Any operation the can cause 4709 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4710 * program constant) has to happen before creating this linkage. 4711 */ 4712 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4713 if (program->shader_program->_LinkedShaders[i] == NULL) 4714 continue; 4715 4716 _mesa_associate_uniform_storage(ctx, program->shader_program, 4717 program->shader_program->_LinkedShaders[i]->Program->Parameters); 4718 } 4719 } 4720 4721out: 4722 if (t) { 4723 FREE(t->insn); 4724 FREE(t->labels); 4725 FREE(t->constants); 4726 FREE(t->immediates); 4727 4728 if (t->error) { 4729 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4730 } 4731 4732 FREE(t); 4733 } 4734 4735 return ret; 4736} 4737/* ----------------------------- End TGSI code ------------------------------ */ 4738 4739/** 4740 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4741 * generating Mesa IR. 4742 */ 4743static struct gl_program * 4744get_mesa_program(struct gl_context *ctx, 4745 struct gl_shader_program *shader_program, 4746 struct gl_shader *shader, 4747 int num_clip_distances) 4748{ 4749 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); 4750 struct gl_program *prog; 4751 GLenum target; 4752 const char *target_string; 4753 bool progress; 4754 struct gl_shader_compiler_options *options = 4755 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4756 4757 switch (shader->Type) { 4758 case GL_VERTEX_SHADER: 4759 target = GL_VERTEX_PROGRAM_ARB; 4760 target_string = "vertex"; 4761 break; 4762 case GL_FRAGMENT_SHADER: 4763 target = GL_FRAGMENT_PROGRAM_ARB; 4764 target_string = "fragment"; 4765 break; 4766 case GL_GEOMETRY_SHADER: 4767 target = GL_GEOMETRY_PROGRAM_NV; 4768 target_string = "geometry"; 4769 break; 4770 default: 4771 assert(!"should not be reached"); 4772 return NULL; 4773 } 4774 4775 validate_ir_tree(shader->ir); 4776 4777 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4778 if (!prog) 4779 return NULL; 4780 prog->Parameters = _mesa_new_parameter_list(); 4781 v->ctx = ctx; 4782 v->prog = prog; 4783 v->shader_program = shader_program; 4784 v->options = options; 4785 v->glsl_version = ctx->Const.GLSLVersion; 4786 v->native_integers = ctx->Const.NativeIntegers; 4787 v->num_clip_distances = num_clip_distances; 4788 4789 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4790 prog->Parameters); 4791 4792 /* Remove reads from output registers. */ 4793 lower_output_reads(shader->ir); 4794 4795 /* Emit intermediate IR for main(). */ 4796 visit_exec_list(shader->ir, v); 4797 4798 /* Now emit bodies for any functions that were used. */ 4799 do { 4800 progress = GL_FALSE; 4801 4802 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4803 function_entry *entry = (function_entry *)iter.get(); 4804 4805 if (!entry->bgn_inst) { 4806 v->current_function = entry; 4807 4808 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4809 entry->bgn_inst->function = entry; 4810 4811 visit_exec_list(&entry->sig->body, v); 4812 4813 glsl_to_tgsi_instruction *last; 4814 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4815 if (last->op != TGSI_OPCODE_RET) 4816 v->emit(NULL, TGSI_OPCODE_RET); 4817 4818 glsl_to_tgsi_instruction *end; 4819 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4820 end->function = entry; 4821 4822 progress = GL_TRUE; 4823 } 4824 } 4825 } while (progress); 4826 4827#if 0 4828 /* Print out some information (for debugging purposes) used by the 4829 * optimization passes. */ 4830 for (i=0; i < v->next_temp; i++) { 4831 int fr = v->get_first_temp_read(i); 4832 int fw = v->get_first_temp_write(i); 4833 int lr = v->get_last_temp_read(i); 4834 int lw = v->get_last_temp_write(i); 4835 4836 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4837 assert(fw <= fr); 4838 } 4839#endif 4840 4841 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4842 v->simplify_cmp(); 4843 v->copy_propagate(); 4844 while (v->eliminate_dead_code_advanced()); 4845 4846 /* FIXME: These passes to optimize temporary registers don't work when there 4847 * is indirect addressing of the temporary register space. We need proper 4848 * array support so that we don't have to give up these passes in every 4849 * shader that uses arrays. 4850 */ 4851 if (!v->indirect_addr_temps) { 4852 v->eliminate_dead_code(); 4853 v->merge_registers(); 4854 v->renumber_registers(); 4855 } 4856 4857 /* Write the END instruction. */ 4858 v->emit(NULL, TGSI_OPCODE_END); 4859 4860 if (ctx->Shader.Flags & GLSL_DUMP) { 4861 printf("\n"); 4862 printf("GLSL IR for linked %s program %d:\n", target_string, 4863 shader_program->Name); 4864 _mesa_print_ir(shader->ir, NULL); 4865 printf("\n"); 4866 printf("\n"); 4867 fflush(stdout); 4868 } 4869 4870 prog->Instructions = NULL; 4871 prog->NumInstructions = 0; 4872 4873 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4874 count_resources(v, prog); 4875 4876 _mesa_reference_program(ctx, &shader->Program, prog); 4877 4878 /* This has to be done last. Any operation the can cause 4879 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4880 * program constant) has to happen before creating this linkage. 4881 */ 4882 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4883 if (!shader_program->LinkStatus) { 4884 return NULL; 4885 } 4886 4887 struct st_vertex_program *stvp; 4888 struct st_fragment_program *stfp; 4889 struct st_geometry_program *stgp; 4890 4891 switch (shader->Type) { 4892 case GL_VERTEX_SHADER: 4893 stvp = (struct st_vertex_program *)prog; 4894 stvp->glsl_to_tgsi = v; 4895 break; 4896 case GL_FRAGMENT_SHADER: 4897 stfp = (struct st_fragment_program *)prog; 4898 stfp->glsl_to_tgsi = v; 4899 break; 4900 case GL_GEOMETRY_SHADER: 4901 stgp = (struct st_geometry_program *)prog; 4902 stgp->glsl_to_tgsi = v; 4903 break; 4904 default: 4905 assert(!"should not be reached"); 4906 return NULL; 4907 } 4908 4909 return prog; 4910} 4911 4912/** 4913 * Searches through the IR for a declaration of gl_ClipDistance and returns the 4914 * declared size of the gl_ClipDistance array. Returns 0 if gl_ClipDistance is 4915 * not declared in the IR. 4916 */ 4917int get_clip_distance_size(exec_list *ir) 4918{ 4919 foreach_iter (exec_list_iterator, iter, *ir) { 4920 ir_instruction *inst = (ir_instruction *)iter.get(); 4921 ir_variable *var = inst->as_variable(); 4922 if (var == NULL) continue; 4923 if (!strcmp(var->name, "gl_ClipDistance")) { 4924 return var->type->length; 4925 } 4926 } 4927 4928 return 0; 4929} 4930 4931extern "C" { 4932 4933struct gl_shader * 4934st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4935{ 4936 struct gl_shader *shader; 4937 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4938 type == GL_GEOMETRY_SHADER_ARB); 4939 shader = rzalloc(NULL, struct gl_shader); 4940 if (shader) { 4941 shader->Type = type; 4942 shader->Name = name; 4943 _mesa_init_shader(ctx, shader); 4944 } 4945 return shader; 4946} 4947 4948struct gl_shader_program * 4949st_new_shader_program(struct gl_context *ctx, GLuint name) 4950{ 4951 struct gl_shader_program *shProg; 4952 shProg = rzalloc(NULL, struct gl_shader_program); 4953 if (shProg) { 4954 shProg->Name = name; 4955 _mesa_init_shader_program(ctx, shProg); 4956 } 4957 return shProg; 4958} 4959 4960/** 4961 * Link a shader. 4962 * Called via ctx->Driver.LinkShader() 4963 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 4964 * with code lowering and other optimizations. 4965 */ 4966GLboolean 4967st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 4968{ 4969 int num_clip_distances[MESA_SHADER_TYPES]; 4970 assert(prog->LinkStatus); 4971 4972 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4973 if (prog->_LinkedShaders[i] == NULL) 4974 continue; 4975 4976 bool progress; 4977 exec_list *ir = prog->_LinkedShaders[i]->ir; 4978 const struct gl_shader_compiler_options *options = 4979 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 4980 4981 /* We have to determine the length of the gl_ClipDistance array before 4982 * the array is lowered to two vec4s by lower_clip_distance(). 4983 */ 4984 num_clip_distances[i] = get_clip_distance_size(ir); 4985 4986 do { 4987 unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP | 4988 EXP_TO_EXP2 | LOG_TO_LOG2; 4989 if (options->EmitNoPow) 4990 what_to_lower |= POW_TO_EXP2; 4991 if (!ctx->Const.NativeIntegers) 4992 what_to_lower |= INT_DIV_TO_MUL_RCP; 4993 4994 progress = false; 4995 4996 /* Lowering */ 4997 do_mat_op_to_vec(ir); 4998 lower_instructions(ir, what_to_lower); 4999 5000 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5001 5002 progress = do_common_optimization(ir, true, true, 5003 options->MaxUnrollIterations) 5004 || progress; 5005 5006 progress = lower_quadop_vector(ir, false) || progress; 5007 progress = lower_clip_distance(ir) || progress; 5008 5009 if (options->MaxIfDepth == 0) 5010 progress = lower_discard(ir) || progress; 5011 5012 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5013 5014 if (options->EmitNoNoise) 5015 progress = lower_noise(ir) || progress; 5016 5017 /* If there are forms of indirect addressing that the driver 5018 * cannot handle, perform the lowering pass. 5019 */ 5020 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5021 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5022 progress = 5023 lower_variable_index_to_cond_assign(ir, 5024 options->EmitNoIndirectInput, 5025 options->EmitNoIndirectOutput, 5026 options->EmitNoIndirectTemp, 5027 options->EmitNoIndirectUniform) 5028 || progress; 5029 5030 progress = do_vec_index_to_cond_assign(ir) || progress; 5031 } while (progress); 5032 5033 validate_ir_tree(ir); 5034 } 5035 5036 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5037 struct gl_program *linked_prog; 5038 5039 if (prog->_LinkedShaders[i] == NULL) 5040 continue; 5041 5042 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i], 5043 num_clip_distances[i]); 5044 5045 if (linked_prog) { 5046 static const GLenum targets[] = { 5047 GL_VERTEX_PROGRAM_ARB, 5048 GL_FRAGMENT_PROGRAM_ARB, 5049 GL_GEOMETRY_PROGRAM_NV 5050 }; 5051 5052 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5053 linked_prog); 5054 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5055 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5056 NULL); 5057 _mesa_reference_program(ctx, &linked_prog, NULL); 5058 return GL_FALSE; 5059 } 5060 } 5061 5062 _mesa_reference_program(ctx, &linked_prog, NULL); 5063 } 5064 5065 return GL_TRUE; 5066} 5067 5068void 5069st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 5070 const GLuint outputMapping[], 5071 struct pipe_stream_output_info *so) 5072{ 5073 unsigned i; 5074 struct gl_transform_feedback_info *info = 5075 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5076 5077 for (i = 0; i < info->NumOutputs; i++) { 5078 so->output[i].register_index = 5079 outputMapping[info->Outputs[i].OutputRegister]; 5080 so->output[i].start_component = info->Outputs[i].ComponentOffset; 5081 so->output[i].num_components = info->Outputs[i].NumComponents; 5082 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5083 so->output[i].dst_offset = info->Outputs[i].DstOffset; 5084 } 5085 5086 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 5087 so->stride[i] = info->BufferStride[i]; 5088 } 5089 so->num_outputs = info->NumOutputs; 5090} 5091 5092} /* extern "C" */ 5093