1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include "st_glsl_to_tgsi.h" 34 35#include "compiler/glsl/glsl_parser_extras.h" 36#include "compiler/glsl/ir_optimization.h" 37#include "compiler/glsl/program.h" 38 39#include "main/errors.h" 40#include "main/shaderobj.h" 41#include "main/uniforms.h" 42#include "main/shaderapi.h" 43#include "main/shaderimage.h" 44#include "program/prog_instruction.h" 45 46#include "pipe/p_context.h" 47#include "pipe/p_screen.h" 48#include "tgsi/tgsi_ureg.h" 49#include "tgsi/tgsi_info.h" 50#include "util/u_math.h" 51#include "util/u_memory.h" 52#include "st_program.h" 53#include "st_mesa_to_tgsi.h" 54#include "st_format.h" 55#include "st_glsl_types.h" 56#include "st_nir.h" 57 58#include <algorithm> 59 60#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ 61 (1 << PROGRAM_CONSTANT) | \ 62 (1 << PROGRAM_UNIFORM)) 63 64#define MAX_GLSL_TEXTURE_OFFSET 4 65 66class st_src_reg; 67class st_dst_reg; 68 69static int swizzle_for_size(int size); 70 71static int swizzle_for_type(const glsl_type *type, int component = 0) 72{ 73 unsigned num_elements = 4; 74 75 if (type) { 76 type = type->without_array(); 77 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 78 num_elements = type->vector_elements; 79 } 80 81 int swizzle = swizzle_for_size(num_elements); 82 assert(num_elements + component <= 4); 83 84 swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1); 85 return swizzle; 86} 87 88/** 89 * This struct is a corresponding struct to TGSI ureg_src. 90 */ 91class st_src_reg { 92public: 93 st_src_reg(gl_register_file file, int index, const glsl_type *type, 94 int component = 0, unsigned array_id = 0) 95 { 96 assert(file != PROGRAM_ARRAY || array_id != 0); 97 this->file = file; 98 this->index = index; 99 this->swizzle = swizzle_for_type(type, component); 100 this->negate = 0; 101 this->abs = 0; 102 this->index2D = 0; 103 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 104 this->reladdr = NULL; 105 this->reladdr2 = NULL; 106 this->has_index2 = false; 107 this->double_reg2 = false; 108 this->array_id = array_id; 109 this->is_double_vertex_input = false; 110 } 111 112 st_src_reg(gl_register_file file, int index, enum glsl_base_type type) 113 { 114 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ 115 this->type = type; 116 this->file = file; 117 this->index = index; 118 this->index2D = 0; 119 this->swizzle = SWIZZLE_XYZW; 120 this->negate = 0; 121 this->abs = 0; 122 this->reladdr = NULL; 123 this->reladdr2 = NULL; 124 this->has_index2 = false; 125 this->double_reg2 = false; 126 this->array_id = 0; 127 this->is_double_vertex_input = false; 128 } 129 130 st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D) 131 { 132 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ 133 this->type = type; 134 this->file = file; 135 this->index = index; 136 this->index2D = index2D; 137 this->swizzle = SWIZZLE_XYZW; 138 this->negate = 0; 139 this->abs = 0; 140 this->reladdr = NULL; 141 this->reladdr2 = NULL; 142 this->has_index2 = false; 143 this->double_reg2 = false; 144 this->array_id = 0; 145 this->is_double_vertex_input = false; 146 } 147 148 st_src_reg() 149 { 150 this->type = GLSL_TYPE_ERROR; 151 this->file = PROGRAM_UNDEFINED; 152 this->index = 0; 153 this->index2D = 0; 154 this->swizzle = 0; 155 this->negate = 0; 156 this->abs = 0; 157 this->reladdr = NULL; 158 this->reladdr2 = NULL; 159 this->has_index2 = false; 160 this->double_reg2 = false; 161 this->array_id = 0; 162 this->is_double_vertex_input = false; 163 } 164 165 explicit st_src_reg(st_dst_reg reg); 166 167 int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 168 int16_t index2D; 169 uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 170 int negate:4; /**< NEGATE_XYZW mask from mesa */ 171 unsigned abs:1; 172 enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 173 unsigned has_index2:1; 174 gl_register_file file:5; /**< PROGRAM_* from Mesa */ 175 /* 176 * Is this the second half of a double register pair? 177 * currently used for input mapping only. 178 */ 179 unsigned double_reg2:1; 180 unsigned is_double_vertex_input:1; 181 unsigned array_id:10; 182 183 /** Register index should be offset by the integer in this reg. */ 184 st_src_reg *reladdr; 185 st_src_reg *reladdr2; 186 187 st_src_reg get_abs() 188 { 189 st_src_reg reg = *this; 190 reg.negate = 0; 191 reg.abs = 1; 192 return reg; 193 } 194}; 195 196class st_dst_reg { 197public: 198 st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index) 199 { 200 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ 201 this->file = file; 202 this->index = index; 203 this->index2D = 0; 204 this->writemask = writemask; 205 this->reladdr = NULL; 206 this->reladdr2 = NULL; 207 this->has_index2 = false; 208 this->type = type; 209 this->array_id = 0; 210 } 211 212 st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type) 213 { 214 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */ 215 this->file = file; 216 this->index = 0; 217 this->index2D = 0; 218 this->writemask = writemask; 219 this->reladdr = NULL; 220 this->reladdr2 = NULL; 221 this->has_index2 = false; 222 this->type = type; 223 this->array_id = 0; 224 } 225 226 st_dst_reg() 227 { 228 this->type = GLSL_TYPE_ERROR; 229 this->file = PROGRAM_UNDEFINED; 230 this->index = 0; 231 this->index2D = 0; 232 this->writemask = 0; 233 this->reladdr = NULL; 234 this->reladdr2 = NULL; 235 this->has_index2 = false; 236 this->array_id = 0; 237 } 238 239 explicit st_dst_reg(st_src_reg reg); 240 241 int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 242 int16_t index2D; 243 gl_register_file file:5; /**< PROGRAM_* from Mesa */ 244 unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */ 245 enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 246 unsigned has_index2:1; 247 unsigned array_id:10; 248 249 /** Register index should be offset by the integer in this reg. */ 250 st_src_reg *reladdr; 251 st_src_reg *reladdr2; 252}; 253 254st_src_reg::st_src_reg(st_dst_reg reg) 255{ 256 this->type = reg.type; 257 this->file = reg.file; 258 this->index = reg.index; 259 this->swizzle = SWIZZLE_XYZW; 260 this->negate = 0; 261 this->abs = 0; 262 this->reladdr = reg.reladdr; 263 this->index2D = reg.index2D; 264 this->reladdr2 = reg.reladdr2; 265 this->has_index2 = reg.has_index2; 266 this->double_reg2 = false; 267 this->array_id = reg.array_id; 268 this->is_double_vertex_input = false; 269} 270 271st_dst_reg::st_dst_reg(st_src_reg reg) 272{ 273 this->type = reg.type; 274 this->file = reg.file; 275 this->index = reg.index; 276 this->writemask = WRITEMASK_XYZW; 277 this->reladdr = reg.reladdr; 278 this->index2D = reg.index2D; 279 this->reladdr2 = reg.reladdr2; 280 this->has_index2 = reg.has_index2; 281 this->array_id = reg.array_id; 282} 283 284class glsl_to_tgsi_instruction : public exec_node { 285public: 286 DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction) 287 288 st_dst_reg dst[2]; 289 st_src_reg src[4]; 290 st_src_reg resource; /**< sampler or buffer register */ 291 st_src_reg *tex_offsets; 292 293 /** Pointer to the ir source this tree came from for debugging */ 294 ir_instruction *ir; 295 296 unsigned op:8; /**< TGSI opcode */ 297 unsigned saturate:1; 298 unsigned is_64bit_expanded:1; 299 unsigned sampler_base:5; 300 unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */ 301 unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */ 302 glsl_base_type tex_type:4; 303 unsigned tex_shadow:1; 304 unsigned image_format:9; 305 unsigned tex_offset_num_offset:3; 306 unsigned dead_mask:4; /**< Used in dead code elimination */ 307 unsigned buffer_access:3; /**< buffer access type */ 308 309 const struct tgsi_opcode_info *info; 310}; 311 312class variable_storage : public exec_node { 313public: 314 variable_storage(ir_variable *var, gl_register_file file, int index, 315 unsigned array_id = 0) 316 : file(file), index(index), component(0), var(var), array_id(array_id) 317 { 318 assert(file != PROGRAM_ARRAY || array_id != 0); 319 } 320 321 gl_register_file file; 322 int index; 323 324 /* Explicit component location. This is given in terms of the GLSL-style 325 * swizzles where each double is a single component, i.e. for 64-bit types 326 * it can only be 0 or 1. 327 */ 328 int component; 329 ir_variable *var; /* variable that maps to this, if any */ 330 unsigned array_id; 331}; 332 333class immediate_storage : public exec_node { 334public: 335 immediate_storage(gl_constant_value *values, int size32, int type) 336 { 337 memcpy(this->values, values, size32 * sizeof(gl_constant_value)); 338 this->size32 = size32; 339 this->type = type; 340 } 341 342 /* doubles are stored across 2 gl_constant_values */ 343 gl_constant_value values[4]; 344 int size32; /**< Number of 32-bit components (1-4) */ 345 int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 346}; 347 348static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 349static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 350 351struct inout_decl { 352 unsigned mesa_index; 353 unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ 354 unsigned size; 355 unsigned interp_loc; 356 unsigned gs_out_streams; 357 enum glsl_interp_mode interp; 358 enum glsl_base_type base_type; 359 ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ 360}; 361 362static struct inout_decl * 363find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) 364{ 365 assert(array_id != 0); 366 367 for (unsigned i = 0; i < count; i++) { 368 struct inout_decl *decl = &decls[i]; 369 370 if (array_id == decl->array_id) { 371 return decl; 372 } 373 } 374 375 return NULL; 376} 377 378static enum glsl_base_type 379find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) 380{ 381 if (!array_id) 382 return GLSL_TYPE_ERROR; 383 struct inout_decl *decl = find_inout_array(decls, count, array_id); 384 if (decl) 385 return decl->base_type; 386 return GLSL_TYPE_ERROR; 387} 388 389struct rename_reg_pair { 390 int old_reg; 391 int new_reg; 392}; 393 394struct glsl_to_tgsi_visitor : public ir_visitor { 395public: 396 glsl_to_tgsi_visitor(); 397 ~glsl_to_tgsi_visitor(); 398 399 struct gl_context *ctx; 400 struct gl_program *prog; 401 struct gl_shader_program *shader_program; 402 struct gl_linked_shader *shader; 403 struct gl_shader_compiler_options *options; 404 405 int next_temp; 406 407 unsigned *array_sizes; 408 unsigned max_num_arrays; 409 unsigned next_array; 410 411 struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; 412 unsigned num_inputs; 413 unsigned num_input_arrays; 414 struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; 415 unsigned num_outputs; 416 unsigned num_output_arrays; 417 418 int num_address_regs; 419 uint32_t samplers_used; 420 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; 421 int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ 422 int buffers_used; 423 int images_used; 424 int image_targets[PIPE_MAX_SHADER_IMAGES]; 425 unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; 426 bool indirect_addr_consts; 427 int wpos_transform_const; 428 429 int glsl_version; 430 bool native_integers; 431 bool have_sqrt; 432 bool have_fma; 433 bool use_shared_memory; 434 435 variable_storage *find_variable_storage(ir_variable *var); 436 437 int add_constant(gl_register_file file, gl_constant_value values[8], 438 int size, int datatype, uint16_t *swizzle_out); 439 440 st_src_reg get_temp(const glsl_type *type); 441 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 442 443 st_src_reg st_src_reg_for_double(double val); 444 st_src_reg st_src_reg_for_float(float val); 445 st_src_reg st_src_reg_for_int(int val); 446 st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); 447 448 /** 449 * \name Visit methods 450 * 451 * As typical for the visitor pattern, there must be one \c visit method for 452 * each concrete subclass of \c ir_instruction. Virtual base classes within 453 * the hierarchy should not have \c visit methods. 454 */ 455 /*@{*/ 456 virtual void visit(ir_variable *); 457 virtual void visit(ir_loop *); 458 virtual void visit(ir_loop_jump *); 459 virtual void visit(ir_function_signature *); 460 virtual void visit(ir_function *); 461 virtual void visit(ir_expression *); 462 virtual void visit(ir_swizzle *); 463 virtual void visit(ir_dereference_variable *); 464 virtual void visit(ir_dereference_array *); 465 virtual void visit(ir_dereference_record *); 466 virtual void visit(ir_assignment *); 467 virtual void visit(ir_constant *); 468 virtual void visit(ir_call *); 469 virtual void visit(ir_return *); 470 virtual void visit(ir_discard *); 471 virtual void visit(ir_texture *); 472 virtual void visit(ir_if *); 473 virtual void visit(ir_emit_vertex *); 474 virtual void visit(ir_end_primitive *); 475 virtual void visit(ir_barrier *); 476 /*@}*/ 477 478 void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE; 479 480 void visit_atomic_counter_intrinsic(ir_call *); 481 void visit_ssbo_intrinsic(ir_call *); 482 void visit_membar_intrinsic(ir_call *); 483 void visit_shared_intrinsic(ir_call *); 484 void visit_image_intrinsic(ir_call *); 485 486 st_src_reg result; 487 488 /** List of variable_storage */ 489 exec_list variables; 490 491 /** List of immediate_storage */ 492 exec_list immediates; 493 unsigned num_immediates; 494 495 /** List of glsl_to_tgsi_instruction */ 496 exec_list instructions; 497 498 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, 499 st_dst_reg dst = undef_dst, 500 st_src_reg src0 = undef_src, 501 st_src_reg src1 = undef_src, 502 st_src_reg src2 = undef_src, 503 st_src_reg src3 = undef_src); 504 505 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, 506 st_dst_reg dst, st_dst_reg dst1, 507 st_src_reg src0 = undef_src, 508 st_src_reg src1 = undef_src, 509 st_src_reg src2 = undef_src, 510 st_src_reg src3 = undef_src); 511 512 unsigned get_opcode(unsigned op, 513 st_dst_reg dst, 514 st_src_reg src0, st_src_reg src1); 515 516 /** 517 * Emit the correct dot-product instruction for the type of arguments 518 */ 519 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 520 st_dst_reg dst, 521 st_src_reg src0, 522 st_src_reg src1, 523 unsigned elements); 524 525 void emit_scalar(ir_instruction *ir, unsigned op, 526 st_dst_reg dst, st_src_reg src0); 527 528 void emit_scalar(ir_instruction *ir, unsigned op, 529 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 530 531 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 532 533 void get_deref_offsets(ir_dereference *ir, 534 unsigned *array_size, 535 unsigned *base, 536 uint16_t *index, 537 st_src_reg *reladdr, 538 bool opaque); 539 void calc_deref_offsets(ir_dereference *tail, 540 unsigned *array_elements, 541 uint16_t *index, 542 st_src_reg *indirect, 543 unsigned *location); 544 st_src_reg canonicalize_gather_offset(st_src_reg offset); 545 546 bool try_emit_mad(ir_expression *ir, 547 int mul_operand); 548 bool try_emit_mad_for_and_not(ir_expression *ir, 549 int mul_operand); 550 551 void emit_swz(ir_expression *ir); 552 553 bool process_move_condition(ir_rvalue *ir); 554 555 void simplify_cmp(void); 556 557 void rename_temp_registers(int num_renames, struct rename_reg_pair *renames); 558 void get_first_temp_read(int *first_reads); 559 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); 560 void get_last_temp_write(int *last_writes); 561 562 void copy_propagate(void); 563 int eliminate_dead_code(void); 564 565 void merge_two_dsts(void); 566 void merge_registers(void); 567 void renumber_registers(void); 568 569 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 570 st_dst_reg *l, st_src_reg *r, 571 st_src_reg *cond, bool cond_swap); 572 573 void *mem_ctx; 574}; 575 576static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); 577static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); 578static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); 579 580static void 581fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 582 583static void 584fail_link(struct gl_shader_program *prog, const char *fmt, ...) 585{ 586 va_list args; 587 va_start(args, fmt); 588 ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); 589 va_end(args); 590 591 prog->data->LinkStatus = GL_FALSE; 592} 593 594static int 595swizzle_for_size(int size) 596{ 597 static const int size_swizzles[4] = { 598 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 599 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 600 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 601 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 602 }; 603 604 assert((size >= 1) && (size <= 4)); 605 return size_swizzles[size - 1]; 606} 607 608static bool 609is_resource_instruction(unsigned opcode) 610{ 611 switch (opcode) { 612 case TGSI_OPCODE_RESQ: 613 case TGSI_OPCODE_LOAD: 614 case TGSI_OPCODE_ATOMUADD: 615 case TGSI_OPCODE_ATOMXCHG: 616 case TGSI_OPCODE_ATOMCAS: 617 case TGSI_OPCODE_ATOMAND: 618 case TGSI_OPCODE_ATOMOR: 619 case TGSI_OPCODE_ATOMXOR: 620 case TGSI_OPCODE_ATOMUMIN: 621 case TGSI_OPCODE_ATOMUMAX: 622 case TGSI_OPCODE_ATOMIMIN: 623 case TGSI_OPCODE_ATOMIMAX: 624 return true; 625 default: 626 return false; 627 } 628} 629 630static unsigned 631num_inst_dst_regs(const glsl_to_tgsi_instruction *op) 632{ 633 return op->info->num_dst; 634} 635 636static unsigned 637num_inst_src_regs(const glsl_to_tgsi_instruction *op) 638{ 639 return op->info->is_tex || is_resource_instruction(op->op) ? 640 op->info->num_src - 1 : op->info->num_src; 641} 642 643glsl_to_tgsi_instruction * 644glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, 645 st_dst_reg dst, st_dst_reg dst1, 646 st_src_reg src0, st_src_reg src1, 647 st_src_reg src2, st_src_reg src3) 648{ 649 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 650 int num_reladdr = 0, i, j; 651 bool dst_is_64bit[2]; 652 653 op = get_opcode(op, dst, src0, src1); 654 655 /* If we have to do relative addressing, we want to load the ARL 656 * reg directly for one of the regs, and preload the other reladdr 657 * sources into temps. 658 */ 659 num_reladdr += dst.reladdr != NULL || dst.reladdr2; 660 num_reladdr += dst1.reladdr != NULL || dst1.reladdr2; 661 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; 662 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; 663 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; 664 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL; 665 666 reladdr_to_temp(ir, &src3, &num_reladdr); 667 reladdr_to_temp(ir, &src2, &num_reladdr); 668 reladdr_to_temp(ir, &src1, &num_reladdr); 669 reladdr_to_temp(ir, &src0, &num_reladdr); 670 671 if (dst.reladdr || dst.reladdr2) { 672 if (dst.reladdr) 673 emit_arl(ir, address_reg, *dst.reladdr); 674 if (dst.reladdr2) 675 emit_arl(ir, address_reg2, *dst.reladdr2); 676 num_reladdr--; 677 } 678 if (dst1.reladdr) { 679 emit_arl(ir, address_reg, *dst1.reladdr); 680 num_reladdr--; 681 } 682 assert(num_reladdr == 0); 683 684 /* inst->op has only 8 bits. */ 685 STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); 686 687 inst->op = op; 688 inst->info = tgsi_get_opcode_info(op); 689 inst->dst[0] = dst; 690 inst->dst[1] = dst1; 691 inst->src[0] = src0; 692 inst->src[1] = src1; 693 inst->src[2] = src2; 694 inst->src[3] = src3; 695 inst->is_64bit_expanded = false; 696 inst->ir = ir; 697 inst->dead_mask = 0; 698 inst->tex_offsets = NULL; 699 inst->tex_offset_num_offset = 0; 700 inst->saturate = 0; 701 inst->tex_shadow = 0; 702 /* default to float, for paths where this is not initialized 703 * (since 0==UINT which is likely wrong): 704 */ 705 inst->tex_type = GLSL_TYPE_FLOAT; 706 707 /* Update indirect addressing status used by TGSI */ 708 if (dst.reladdr || dst.reladdr2) { 709 switch(dst.file) { 710 case PROGRAM_STATE_VAR: 711 case PROGRAM_CONSTANT: 712 case PROGRAM_UNIFORM: 713 this->indirect_addr_consts = true; 714 break; 715 case PROGRAM_IMMEDIATE: 716 assert(!"immediates should not have indirect addressing"); 717 break; 718 default: 719 break; 720 } 721 } 722 else { 723 for (i = 0; i < 4; i++) { 724 if(inst->src[i].reladdr) { 725 switch(inst->src[i].file) { 726 case PROGRAM_STATE_VAR: 727 case PROGRAM_CONSTANT: 728 case PROGRAM_UNIFORM: 729 this->indirect_addr_consts = true; 730 break; 731 case PROGRAM_IMMEDIATE: 732 assert(!"immediates should not have indirect addressing"); 733 break; 734 default: 735 break; 736 } 737 } 738 } 739 } 740 741 /* 742 * This section contains the double processing. 743 * GLSL just represents doubles as single channel values, 744 * however most HW and TGSI represent doubles as pairs of register channels. 745 * 746 * so we have to fixup destination writemask/index and src swizzle/indexes. 747 * dest writemasks need to translate from single channel write mask 748 * to a dual-channel writemask, but also need to modify the index, 749 * if we are touching the Z,W fields in the pre-translated writemask. 750 * 751 * src channels have similiar index modifications along with swizzle 752 * changes to we pick the XY, ZW pairs from the correct index. 753 * 754 * GLSL [0].x -> TGSI [0].xy 755 * GLSL [0].y -> TGSI [0].zw 756 * GLSL [0].z -> TGSI [1].xy 757 * GLSL [0].w -> TGSI [1].zw 758 */ 759 for (j = 0; j < 2; j++) { 760 dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); 761 if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { 762 enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id); 763 if (glsl_base_type_is_64bit(type)) 764 dst_is_64bit[j] = true; 765 } 766 } 767 768 if (dst_is_64bit[0] || dst_is_64bit[1] || 769 glsl_base_type_is_64bit(inst->src[0].type)) { 770 glsl_to_tgsi_instruction *dinst = NULL; 771 int initial_src_swz[4], initial_src_idx[4]; 772 int initial_dst_idx[2], initial_dst_writemask[2]; 773 /* select the writemask for dst0 or dst1 */ 774 unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask; 775 776 /* copy out the writemask, index and swizzles for all src/dsts. */ 777 for (j = 0; j < 2; j++) { 778 initial_dst_writemask[j] = inst->dst[j].writemask; 779 initial_dst_idx[j] = inst->dst[j].index; 780 } 781 782 for (j = 0; j < 4; j++) { 783 initial_src_swz[j] = inst->src[j].swizzle; 784 initial_src_idx[j] = inst->src[j].index; 785 } 786 787 /* 788 * scan all the components in the dst writemask 789 * generate an instruction for each of them if required. 790 */ 791 st_src_reg addr; 792 while (writemask) { 793 794 int i = u_bit_scan(&writemask); 795 796 /* before emitting the instruction, see if we have to adjust load / store 797 * address */ 798 if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) && 799 addr.file == PROGRAM_UNDEFINED) { 800 /* We have to advance the buffer address by 16 */ 801 addr = get_temp(glsl_type::uint_type); 802 emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), 803 inst->src[0], st_src_reg_for_int(16)); 804 } 805 806 /* first time use previous instruction */ 807 if (dinst == NULL) { 808 dinst = inst; 809 } else { 810 /* create a new instructions for subsequent attempts */ 811 dinst = new(mem_ctx) glsl_to_tgsi_instruction(); 812 *dinst = *inst; 813 dinst->next = NULL; 814 dinst->prev = NULL; 815 } 816 this->instructions.push_tail(dinst); 817 dinst->is_64bit_expanded = true; 818 819 /* modify the destination if we are splitting */ 820 for (j = 0; j < 2; j++) { 821 if (dst_is_64bit[j]) { 822 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; 823 dinst->dst[j].index = initial_dst_idx[j]; 824 if (i > 1) { 825 if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE) 826 dinst->src[0] = addr; 827 if (dinst->op != TGSI_OPCODE_STORE) 828 dinst->dst[j].index++; 829 } 830 } else { 831 /* if we aren't writing to a double, just get the bit of the initial writemask 832 for this channel */ 833 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); 834 } 835 } 836 837 /* modify the src registers */ 838 for (j = 0; j < 4; j++) { 839 int swz = GET_SWZ(initial_src_swz[j], i); 840 841 if (glsl_base_type_is_64bit(dinst->src[j].type)) { 842 dinst->src[j].index = initial_src_idx[j]; 843 if (swz > 1) { 844 dinst->src[j].double_reg2 = true; 845 dinst->src[j].index++; 846 } 847 848 if (swz & 1) 849 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 850 else 851 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 852 853 } else { 854 /* some opcodes are special case in what they use as sources 855 - [FUI]2D/[UI]2I64 is a float/[u]int src0, DLDEXP is integer src1 */ 856 if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D || 857 op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || 858 op == TGSI_OPCODE_DLDEXP || 859 (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { 860 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); 861 } 862 } 863 } 864 } 865 inst = dinst; 866 } else { 867 this->instructions.push_tail(inst); 868 } 869 870 871 return inst; 872} 873 874glsl_to_tgsi_instruction * 875glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, 876 st_dst_reg dst, 877 st_src_reg src0, st_src_reg src1, 878 st_src_reg src2, st_src_reg src3) 879{ 880 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); 881} 882 883/** 884 * Determines whether to use an integer, unsigned integer, or float opcode 885 * based on the operands and input opcode, then emits the result. 886 */ 887unsigned 888glsl_to_tgsi_visitor::get_opcode(unsigned op, 889 st_dst_reg dst, 890 st_src_reg src0, st_src_reg src1) 891{ 892 enum glsl_base_type type = GLSL_TYPE_FLOAT; 893 894 if (op == TGSI_OPCODE_MOV) 895 return op; 896 897 assert(src0.type != GLSL_TYPE_ARRAY); 898 assert(src0.type != GLSL_TYPE_STRUCT); 899 assert(src1.type != GLSL_TYPE_ARRAY); 900 assert(src1.type != GLSL_TYPE_STRUCT); 901 902 if (is_resource_instruction(op)) 903 type = src1.type; 904 else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) 905 type = GLSL_TYPE_DOUBLE; 906 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 907 type = GLSL_TYPE_FLOAT; 908 else if (native_integers) 909 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 910 911#define case5(c, f, i, u, d) \ 912 case TGSI_OPCODE_##c: \ 913 if (type == GLSL_TYPE_DOUBLE) \ 914 op = TGSI_OPCODE_##d; \ 915 else if (type == GLSL_TYPE_INT) \ 916 op = TGSI_OPCODE_##i; \ 917 else if (type == GLSL_TYPE_UINT) \ 918 op = TGSI_OPCODE_##u; \ 919 else \ 920 op = TGSI_OPCODE_##f; \ 921 break; 922 923#define case4(c, f, i, u) \ 924 case TGSI_OPCODE_##c: \ 925 if (type == GLSL_TYPE_INT) \ 926 op = TGSI_OPCODE_##i; \ 927 else if (type == GLSL_TYPE_UINT) \ 928 op = TGSI_OPCODE_##u; \ 929 else \ 930 op = TGSI_OPCODE_##f; \ 931 break; 932 933#define case3(f, i, u) case4(f, f, i, u) 934#define case4d(f, i, u, d) case5(f, f, i, u, d) 935#define case3fid(f, i, d) case5(f, f, i, i, d) 936#define case2fi(f, i) case4(f, f, i, i) 937#define case2iu(i, u) case4(i, LAST, i, u) 938 939#define casecomp(c, f, i, u, d) \ 940 case TGSI_OPCODE_##c: \ 941 if (type == GLSL_TYPE_DOUBLE) \ 942 op = TGSI_OPCODE_##d; \ 943 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ 944 op = TGSI_OPCODE_##i; \ 945 else if (type == GLSL_TYPE_UINT) \ 946 op = TGSI_OPCODE_##u; \ 947 else if (native_integers) \ 948 op = TGSI_OPCODE_##f; \ 949 else \ 950 op = TGSI_OPCODE_##c; \ 951 break; 952 953 switch(op) { 954 case3fid(ADD, UADD, DADD); 955 case3fid(MUL, UMUL, DMUL); 956 case3fid(MAD, UMAD, DMAD); 957 case3fid(FMA, UMAD, DFMA); 958 case4d(DIV, IDIV, UDIV, DDIV); 959 case4d(MAX, IMAX, UMAX, DMAX); 960 case4d(MIN, IMIN, UMIN, DMIN); 961 case2iu(MOD, UMOD); 962 963 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ); 964 casecomp(SNE, FSNE, USNE, USNE, DSNE); 965 casecomp(SGE, FSGE, ISGE, USGE, DSGE); 966 casecomp(SLT, FSLT, ISLT, USLT, DSLT); 967 968 case2iu(ISHR, USHR); 969 970 case3fid(SSG, ISSG, DSSG); 971 972 case2iu(IBFE, UBFE); 973 case2iu(IMSB, UMSB); 974 case2iu(IMUL_HI, UMUL_HI); 975 976 case3fid(SQRT, SQRT, DSQRT); 977 978 case3fid(RCP, RCP, DRCP); 979 case3fid(RSQ, RSQ, DRSQ); 980 981 case3fid(FRC, FRC, DFRAC); 982 case3fid(TRUNC, TRUNC, DTRUNC); 983 case3fid(CEIL, CEIL, DCEIL); 984 case3fid(FLR, FLR, DFLR); 985 case3fid(ROUND, ROUND, DROUND); 986 987 case2iu(ATOMIMAX, ATOMUMAX); 988 case2iu(ATOMIMIN, ATOMUMIN); 989 990 default: break; 991 } 992 993 assert(op != TGSI_OPCODE_LAST); 994 return op; 995} 996 997glsl_to_tgsi_instruction * 998glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 999 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 1000 unsigned elements) 1001{ 1002 static const unsigned dot_opcodes[] = { 1003 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 1004 }; 1005 1006 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); 1007} 1008 1009/** 1010 * Emits TGSI scalar opcodes to produce unique answers across channels. 1011 * 1012 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 1013 * channel determines the result across all channels. So to do a vec4 1014 * of this operation, we want to emit a scalar per source channel used 1015 * to produce dest channels. 1016 */ 1017void 1018glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 1019 st_dst_reg dst, 1020 st_src_reg orig_src0, st_src_reg orig_src1) 1021{ 1022 int i, j; 1023 int done_mask = ~dst.writemask; 1024 1025 /* TGSI RCP is a scalar operation splatting results to all channels, 1026 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 1027 * dst channels. 1028 */ 1029 for (i = 0; i < 4; i++) { 1030 GLuint this_mask = (1 << i); 1031 st_src_reg src0 = orig_src0; 1032 st_src_reg src1 = orig_src1; 1033 1034 if (done_mask & this_mask) 1035 continue; 1036 1037 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 1038 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 1039 for (j = i + 1; j < 4; j++) { 1040 /* If there is another enabled component in the destination that is 1041 * derived from the same inputs, generate its value on this pass as 1042 * well. 1043 */ 1044 if (!(done_mask & (1 << j)) && 1045 GET_SWZ(src0.swizzle, j) == src0_swiz && 1046 GET_SWZ(src1.swizzle, j) == src1_swiz) { 1047 this_mask |= (1 << j); 1048 } 1049 } 1050 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 1051 src0_swiz, src0_swiz); 1052 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 1053 src1_swiz, src1_swiz); 1054 1055 dst.writemask = this_mask; 1056 emit_asm(ir, op, dst, src0, src1); 1057 done_mask |= this_mask; 1058 } 1059} 1060 1061void 1062glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 1063 st_dst_reg dst, st_src_reg src0) 1064{ 1065 st_src_reg undef = undef_src; 1066 1067 undef.swizzle = SWIZZLE_XXXX; 1068 1069 emit_scalar(ir, op, dst, src0, undef); 1070} 1071 1072void 1073glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 1074 st_dst_reg dst, st_src_reg src0) 1075{ 1076 int op = TGSI_OPCODE_ARL; 1077 1078 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 1079 op = TGSI_OPCODE_UARL; 1080 1081 assert(dst.file == PROGRAM_ADDRESS); 1082 if (dst.index >= this->num_address_regs) 1083 this->num_address_regs = dst.index + 1; 1084 1085 emit_asm(NULL, op, dst, src0); 1086} 1087 1088int 1089glsl_to_tgsi_visitor::add_constant(gl_register_file file, 1090 gl_constant_value values[8], int size, int datatype, 1091 uint16_t *swizzle_out) 1092{ 1093 if (file == PROGRAM_CONSTANT) { 1094 GLuint swizzle = swizzle_out ? *swizzle_out : 0; 1095 int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 1096 size, datatype, &swizzle); 1097 if (swizzle_out) 1098 *swizzle_out = swizzle; 1099 return result; 1100 } 1101 1102 assert(file == PROGRAM_IMMEDIATE); 1103 1104 int index = 0; 1105 immediate_storage *entry; 1106 int size32 = size * (datatype == GL_DOUBLE ? 2 : 1); 1107 int i; 1108 1109 /* Search immediate storage to see if we already have an identical 1110 * immediate that we can use instead of adding a duplicate entry. 1111 */ 1112 foreach_in_list(immediate_storage, entry, &this->immediates) { 1113 immediate_storage *tmp = entry; 1114 1115 for (i = 0; i * 4 < size32; i++) { 1116 int slot_size = MIN2(size32 - (i * 4), 4); 1117 if (tmp->type != datatype || tmp->size32 != slot_size) 1118 break; 1119 if (memcmp(tmp->values, &values[i * 4], 1120 slot_size * sizeof(gl_constant_value))) 1121 break; 1122 1123 /* Everything matches, keep going until the full size is matched */ 1124 tmp = (immediate_storage *)tmp->next; 1125 } 1126 1127 /* The full value matched */ 1128 if (i * 4 >= size32) 1129 return index; 1130 1131 index++; 1132 } 1133 1134 for (i = 0; i * 4 < size32; i++) { 1135 int slot_size = MIN2(size32 - (i * 4), 4); 1136 /* Add this immediate to the list. */ 1137 entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype); 1138 this->immediates.push_tail(entry); 1139 this->num_immediates++; 1140 } 1141 return index; 1142} 1143 1144st_src_reg 1145glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 1146{ 1147 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 1148 union gl_constant_value uval; 1149 1150 uval.f = val; 1151 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 1152 1153 return src; 1154} 1155 1156st_src_reg 1157glsl_to_tgsi_visitor::st_src_reg_for_double(double val) 1158{ 1159 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE); 1160 union gl_constant_value uval[2]; 1161 1162 memcpy(uval, &val, sizeof(uval)); 1163 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); 1164 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 1165 return src; 1166} 1167 1168st_src_reg 1169glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 1170{ 1171 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 1172 union gl_constant_value uval; 1173 1174 assert(native_integers); 1175 1176 uval.i = val; 1177 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 1178 1179 return src; 1180} 1181 1182st_src_reg 1183glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val) 1184{ 1185 if (native_integers) 1186 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 1187 st_src_reg_for_int(val); 1188 else 1189 return st_src_reg_for_float(val); 1190} 1191 1192static int 1193attrib_type_size(const struct glsl_type *type, bool is_vs_input) 1194{ 1195 return st_glsl_attrib_type_size(type, is_vs_input); 1196} 1197 1198static int 1199type_size(const struct glsl_type *type) 1200{ 1201 return st_glsl_type_size(type); 1202} 1203 1204/** 1205 * If the given GLSL type is an array or matrix or a structure containing 1206 * an array/matrix member, return true. Else return false. 1207 * 1208 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY 1209 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime 1210 * we have an array that might be indexed with a variable, we need to use 1211 * the later storage type. 1212 */ 1213static bool 1214type_has_array_or_matrix(const glsl_type *type) 1215{ 1216 if (type->is_array() || type->is_matrix()) 1217 return true; 1218 1219 if (type->is_record()) { 1220 for (unsigned i = 0; i < type->length; i++) { 1221 if (type_has_array_or_matrix(type->fields.structure[i].type)) { 1222 return true; 1223 } 1224 } 1225 } 1226 1227 return false; 1228} 1229 1230 1231/** 1232 * In the initial pass of codegen, we assign temporary numbers to 1233 * intermediate results. (not SSA -- variable assignments will reuse 1234 * storage). 1235 */ 1236st_src_reg 1237glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 1238{ 1239 st_src_reg src; 1240 1241 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 1242 src.reladdr = NULL; 1243 src.negate = 0; 1244 src.abs = 0; 1245 1246 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { 1247 if (next_array >= max_num_arrays) { 1248 max_num_arrays += 32; 1249 array_sizes = (unsigned*) 1250 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); 1251 } 1252 1253 src.file = PROGRAM_ARRAY; 1254 src.index = 0; 1255 src.array_id = next_array + 1; 1256 array_sizes[next_array] = type_size(type); 1257 ++next_array; 1258 1259 } else { 1260 src.file = PROGRAM_TEMPORARY; 1261 src.index = next_temp; 1262 next_temp += type_size(type); 1263 } 1264 1265 if (type->is_array() || type->is_record()) { 1266 src.swizzle = SWIZZLE_NOOP; 1267 } else { 1268 src.swizzle = swizzle_for_size(type->vector_elements); 1269 } 1270 1271 return src; 1272} 1273 1274variable_storage * 1275glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1276{ 1277 1278 foreach_in_list(variable_storage, entry, &this->variables) { 1279 if (entry->var == var) 1280 return entry; 1281 } 1282 1283 return NULL; 1284} 1285 1286void 1287glsl_to_tgsi_visitor::visit(ir_variable *ir) 1288{ 1289 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1290 this->prog->OriginUpperLeft = ir->data.origin_upper_left; 1291 this->prog->PixelCenterInteger = ir->data.pixel_center_integer; 1292 } 1293 1294 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1295 unsigned int i; 1296 const ir_state_slot *const slots = ir->get_state_slots(); 1297 assert(slots != NULL); 1298 1299 /* Check if this statevar's setup in the STATE file exactly 1300 * matches how we'll want to reference it as a 1301 * struct/array/whatever. If not, then we need to move it into 1302 * temporary storage and hope that it'll get copy-propagated 1303 * out. 1304 */ 1305 for (i = 0; i < ir->get_num_state_slots(); i++) { 1306 if (slots[i].swizzle != SWIZZLE_XYZW) { 1307 break; 1308 } 1309 } 1310 1311 variable_storage *storage; 1312 st_dst_reg dst; 1313 if (i == ir->get_num_state_slots()) { 1314 /* We'll set the index later. */ 1315 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1316 this->variables.push_tail(storage); 1317 1318 dst = undef_dst; 1319 } else { 1320 /* The variable_storage constructor allocates slots based on the size 1321 * of the type. However, this had better match the number of state 1322 * elements that we're going to copy into the new temporary. 1323 */ 1324 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 1325 1326 dst = st_dst_reg(get_temp(ir->type)); 1327 1328 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, 1329 dst.array_id); 1330 1331 this->variables.push_tail(storage); 1332 } 1333 1334 1335 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 1336 int index = _mesa_add_state_reference(this->prog->Parameters, 1337 (gl_state_index *)slots[i].tokens); 1338 1339 if (storage->file == PROGRAM_STATE_VAR) { 1340 if (storage->index == -1) { 1341 storage->index = index; 1342 } else { 1343 assert(index == storage->index + (int)i); 1344 } 1345 } else { 1346 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of 1347 * the data being moved since MOV does not care about the type of 1348 * data it is moving, and we don't want to declare registers with 1349 * array or struct types. 1350 */ 1351 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); 1352 src.swizzle = slots[i].swizzle; 1353 emit_asm(ir, TGSI_OPCODE_MOV, dst, src); 1354 /* even a float takes up a whole vec4 reg in a struct/array. */ 1355 dst.index++; 1356 } 1357 } 1358 1359 if (storage->file == PROGRAM_TEMPORARY && 1360 dst.index != storage->index + (int) ir->get_num_state_slots()) { 1361 fail_link(this->shader_program, 1362 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1363 ir->name, dst.index - storage->index, 1364 type_size(ir->type)); 1365 } 1366 } 1367} 1368 1369void 1370glsl_to_tgsi_visitor::visit(ir_loop *ir) 1371{ 1372 emit_asm(NULL, TGSI_OPCODE_BGNLOOP); 1373 1374 visit_exec_list(&ir->body_instructions, this); 1375 1376 emit_asm(NULL, TGSI_OPCODE_ENDLOOP); 1377} 1378 1379void 1380glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1381{ 1382 switch (ir->mode) { 1383 case ir_loop_jump::jump_break: 1384 emit_asm(NULL, TGSI_OPCODE_BRK); 1385 break; 1386 case ir_loop_jump::jump_continue: 1387 emit_asm(NULL, TGSI_OPCODE_CONT); 1388 break; 1389 } 1390} 1391 1392 1393void 1394glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1395{ 1396 assert(0); 1397 (void)ir; 1398} 1399 1400void 1401glsl_to_tgsi_visitor::visit(ir_function *ir) 1402{ 1403 /* Ignore function bodies other than main() -- we shouldn't see calls to 1404 * them since they should all be inlined before we get to glsl_to_tgsi. 1405 */ 1406 if (strcmp(ir->name, "main") == 0) { 1407 const ir_function_signature *sig; 1408 exec_list empty; 1409 1410 sig = ir->matching_signature(NULL, &empty, false); 1411 1412 assert(sig); 1413 1414 foreach_in_list(ir_instruction, ir, &sig->body) { 1415 ir->accept(this); 1416 } 1417 } 1418} 1419 1420bool 1421glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1422{ 1423 int nonmul_operand = 1 - mul_operand; 1424 st_src_reg a, b, c; 1425 st_dst_reg result_dst; 1426 1427 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1428 if (!expr || expr->operation != ir_binop_mul) 1429 return false; 1430 1431 expr->operands[0]->accept(this); 1432 a = this->result; 1433 expr->operands[1]->accept(this); 1434 b = this->result; 1435 ir->operands[nonmul_operand]->accept(this); 1436 c = this->result; 1437 1438 this->result = get_temp(ir->type); 1439 result_dst = st_dst_reg(this->result); 1440 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1441 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1442 1443 return true; 1444} 1445 1446/** 1447 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1448 * 1449 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1450 * implemented using multiplication, and logical-or is implemented using 1451 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1452 * As result, the logical expression (a & !b) can be rewritten as: 1453 * 1454 * - a * !b 1455 * - a * (1 - b) 1456 * - (a * 1) - (a * b) 1457 * - a + -(a * b) 1458 * - a + (a * -b) 1459 * 1460 * This final expression can be implemented as a single MAD(a, -b, a) 1461 * instruction. 1462 */ 1463bool 1464glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1465{ 1466 const int other_operand = 1 - try_operand; 1467 st_src_reg a, b; 1468 1469 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1470 if (!expr || expr->operation != ir_unop_logic_not) 1471 return false; 1472 1473 ir->operands[other_operand]->accept(this); 1474 a = this->result; 1475 expr->operands[0]->accept(this); 1476 b = this->result; 1477 1478 b.negate = ~b.negate; 1479 1480 this->result = get_temp(ir->type); 1481 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1482 1483 return true; 1484} 1485 1486void 1487glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1488 st_src_reg *reg, int *num_reladdr) 1489{ 1490 if (!reg->reladdr && !reg->reladdr2) 1491 return; 1492 1493 if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr); 1494 if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2); 1495 1496 if (*num_reladdr != 1) { 1497 st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type); 1498 1499 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1500 *reg = temp; 1501 } 1502 1503 (*num_reladdr)--; 1504} 1505 1506void 1507glsl_to_tgsi_visitor::visit(ir_expression *ir) 1508{ 1509 st_src_reg op[ARRAY_SIZE(ir->operands)]; 1510 1511 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1512 */ 1513 if (ir->operation == ir_binop_add) { 1514 if (try_emit_mad(ir, 1)) 1515 return; 1516 if (try_emit_mad(ir, 0)) 1517 return; 1518 } 1519 1520 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1521 */ 1522 if (!native_integers && ir->operation == ir_binop_logic_and) { 1523 if (try_emit_mad_for_and_not(ir, 1)) 1524 return; 1525 if (try_emit_mad_for_and_not(ir, 0)) 1526 return; 1527 } 1528 1529 if (ir->operation == ir_quadop_vector) 1530 assert(!"ir_quadop_vector should have been lowered"); 1531 1532 for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) { 1533 this->result.file = PROGRAM_UNDEFINED; 1534 ir->operands[operand]->accept(this); 1535 if (this->result.file == PROGRAM_UNDEFINED) { 1536 printf("Failed to get tree for expression operand:\n"); 1537 ir->operands[operand]->print(); 1538 printf("\n"); 1539 exit(1); 1540 } 1541 op[operand] = this->result; 1542 1543 /* Matrix expression operands should have been broken down to vector 1544 * operations already. 1545 */ 1546 assert(!ir->operands[operand]->type->is_matrix()); 1547 } 1548 1549 visit_expression(ir, op); 1550} 1551 1552/* The non-recursive part of the expression visitor lives in a separate 1553 * function and should be prevented from being inlined, to avoid a stack 1554 * explosion when deeply nested expressions are visited. 1555 */ 1556void 1557glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) 1558{ 1559 st_src_reg result_src; 1560 st_dst_reg result_dst; 1561 1562 int vector_elements = ir->operands[0]->type->vector_elements; 1563 if (ir->operands[1]) { 1564 vector_elements = MAX2(vector_elements, 1565 ir->operands[1]->type->vector_elements); 1566 } 1567 1568 this->result.file = PROGRAM_UNDEFINED; 1569 1570 /* Storage for our result. Ideally for an assignment we'd be using 1571 * the actual storage for the result here, instead. 1572 */ 1573 result_src = get_temp(ir->type); 1574 /* convenience for the emit functions below. */ 1575 result_dst = st_dst_reg(result_src); 1576 /* Limit writes to the channels that will be used by result_src later. 1577 * This does limit this temp's use as a temporary for multi-instruction 1578 * sequences. 1579 */ 1580 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1581 1582 switch (ir->operation) { 1583 case ir_unop_logic_not: 1584 if (result_dst.type != GLSL_TYPE_FLOAT) 1585 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1586 else { 1587 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1588 * older GPUs implement SEQ using multiple instructions (i915 uses two 1589 * SGE instructions and a MUL instruction). Since our logic values are 1590 * 0.0 and 1.0, 1-x also implements !x. 1591 */ 1592 op[0].negate = ~op[0].negate; 1593 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1594 } 1595 break; 1596 case ir_unop_neg: 1597 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1598 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1599 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1600 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); 1601 else { 1602 op[0].negate = ~op[0].negate; 1603 result_src = op[0]; 1604 } 1605 break; 1606 case ir_unop_subroutine_to_int: 1607 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1608 break; 1609 case ir_unop_abs: 1610 if (result_dst.type == GLSL_TYPE_FLOAT) 1611 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); 1612 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1613 emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); 1614 else 1615 emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); 1616 break; 1617 case ir_unop_sign: 1618 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1619 break; 1620 case ir_unop_rcp: 1621 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1622 break; 1623 1624 case ir_unop_exp2: 1625 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1626 break; 1627 case ir_unop_exp: 1628 case ir_unop_log: 1629 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1630 break; 1631 case ir_unop_log2: 1632 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1633 break; 1634 case ir_unop_sin: 1635 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1636 break; 1637 case ir_unop_cos: 1638 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1639 break; 1640 case ir_unop_saturate: { 1641 glsl_to_tgsi_instruction *inst; 1642 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1643 inst->saturate = true; 1644 break; 1645 } 1646 1647 case ir_unop_dFdx: 1648 case ir_unop_dFdx_coarse: 1649 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1650 break; 1651 case ir_unop_dFdx_fine: 1652 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); 1653 break; 1654 case ir_unop_dFdy: 1655 case ir_unop_dFdy_coarse: 1656 case ir_unop_dFdy_fine: 1657 { 1658 /* The X component contains 1 or -1 depending on whether the framebuffer 1659 * is a FBO or the window system buffer, respectively. 1660 * It is then multiplied with the source operand of DDY. 1661 */ 1662 static const gl_state_index transform_y_state[STATE_LENGTH] 1663 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 1664 1665 unsigned transform_y_index = 1666 _mesa_add_state_reference(this->prog->Parameters, 1667 transform_y_state); 1668 1669 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 1670 transform_y_index, 1671 glsl_type::vec4_type); 1672 transform_y.swizzle = SWIZZLE_XXXX; 1673 1674 st_src_reg temp = get_temp(glsl_type::vec4_type); 1675 1676 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); 1677 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? 1678 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); 1679 break; 1680 } 1681 1682 case ir_unop_frexp_sig: 1683 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); 1684 break; 1685 1686 case ir_unop_frexp_exp: 1687 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); 1688 break; 1689 1690 case ir_unop_noise: { 1691 /* At some point, a motivated person could add a better 1692 * implementation of noise. Currently not even the nvidia 1693 * binary drivers do anything more than this. In any case, the 1694 * place to do this is in the GL state tracker, not the poor 1695 * driver. 1696 */ 1697 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1698 break; 1699 } 1700 1701 case ir_binop_add: 1702 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1703 break; 1704 case ir_binop_sub: 1705 op[1].negate = ~op[1].negate; 1706 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1707 break; 1708 1709 case ir_binop_mul: 1710 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1711 break; 1712 case ir_binop_div: 1713 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1714 break; 1715 case ir_binop_mod: 1716 if (result_dst.type == GLSL_TYPE_FLOAT) 1717 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1718 else 1719 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1720 break; 1721 1722 case ir_binop_less: 1723 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1724 break; 1725 case ir_binop_greater: 1726 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1727 break; 1728 case ir_binop_lequal: 1729 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1730 break; 1731 case ir_binop_gequal: 1732 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1733 break; 1734 case ir_binop_equal: 1735 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1736 break; 1737 case ir_binop_nequal: 1738 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1739 break; 1740 case ir_binop_all_equal: 1741 /* "==" operator producing a scalar boolean. */ 1742 if (ir->operands[0]->type->is_vector() || 1743 ir->operands[1]->type->is_vector()) { 1744 st_src_reg temp = get_temp(native_integers ? 1745 glsl_type::uvec4_type : 1746 glsl_type::vec4_type); 1747 1748 if (native_integers) { 1749 st_dst_reg temp_dst = st_dst_reg(temp); 1750 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1751 1752 if (ir->operands[0]->type->is_boolean() && 1753 ir->operands[1]->as_constant() && 1754 ir->operands[1]->as_constant()->is_one()) { 1755 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1756 } else { 1757 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1758 } 1759 1760 /* Emit 1-3 AND operations to combine the SEQ results. */ 1761 switch (ir->operands[0]->type->vector_elements) { 1762 case 2: 1763 break; 1764 case 3: 1765 temp_dst.writemask = WRITEMASK_Y; 1766 temp1.swizzle = SWIZZLE_YYYY; 1767 temp2.swizzle = SWIZZLE_ZZZZ; 1768 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1769 break; 1770 case 4: 1771 temp_dst.writemask = WRITEMASK_X; 1772 temp1.swizzle = SWIZZLE_XXXX; 1773 temp2.swizzle = SWIZZLE_YYYY; 1774 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1775 temp_dst.writemask = WRITEMASK_Y; 1776 temp1.swizzle = SWIZZLE_ZZZZ; 1777 temp2.swizzle = SWIZZLE_WWWW; 1778 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1779 } 1780 1781 temp1.swizzle = SWIZZLE_XXXX; 1782 temp2.swizzle = SWIZZLE_YYYY; 1783 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1784 } else { 1785 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1786 1787 /* After the dot-product, the value will be an integer on the 1788 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1789 */ 1790 emit_dp(ir, result_dst, temp, temp, vector_elements); 1791 1792 /* Negating the result of the dot-product gives values on the range 1793 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1794 * This is achieved using SGE. 1795 */ 1796 st_src_reg sge_src = result_src; 1797 sge_src.negate = ~sge_src.negate; 1798 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1799 } 1800 } else { 1801 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1802 } 1803 break; 1804 case ir_binop_any_nequal: 1805 /* "!=" operator producing a scalar boolean. */ 1806 if (ir->operands[0]->type->is_vector() || 1807 ir->operands[1]->type->is_vector()) { 1808 st_src_reg temp = get_temp(native_integers ? 1809 glsl_type::uvec4_type : 1810 glsl_type::vec4_type); 1811 if (ir->operands[0]->type->is_boolean() && 1812 ir->operands[1]->as_constant() && 1813 ir->operands[1]->as_constant()->is_zero()) { 1814 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1815 } else { 1816 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1817 } 1818 1819 if (native_integers) { 1820 st_dst_reg temp_dst = st_dst_reg(temp); 1821 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1822 1823 /* Emit 1-3 OR operations to combine the SNE results. */ 1824 switch (ir->operands[0]->type->vector_elements) { 1825 case 2: 1826 break; 1827 case 3: 1828 temp_dst.writemask = WRITEMASK_Y; 1829 temp1.swizzle = SWIZZLE_YYYY; 1830 temp2.swizzle = SWIZZLE_ZZZZ; 1831 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1832 break; 1833 case 4: 1834 temp_dst.writemask = WRITEMASK_X; 1835 temp1.swizzle = SWIZZLE_XXXX; 1836 temp2.swizzle = SWIZZLE_YYYY; 1837 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1838 temp_dst.writemask = WRITEMASK_Y; 1839 temp1.swizzle = SWIZZLE_ZZZZ; 1840 temp2.swizzle = SWIZZLE_WWWW; 1841 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1842 } 1843 1844 temp1.swizzle = SWIZZLE_XXXX; 1845 temp2.swizzle = SWIZZLE_YYYY; 1846 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1847 } else { 1848 /* After the dot-product, the value will be an integer on the 1849 * range [0,4]. Zero stays zero, and positive values become 1.0. 1850 */ 1851 glsl_to_tgsi_instruction *const dp = 1852 emit_dp(ir, result_dst, temp, temp, vector_elements); 1853 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1854 /* The clamping to [0,1] can be done for free in the fragment 1855 * shader with a saturate. 1856 */ 1857 dp->saturate = true; 1858 } else { 1859 /* Negating the result of the dot-product gives values on the range 1860 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1861 * achieved using SLT. 1862 */ 1863 st_src_reg slt_src = result_src; 1864 slt_src.negate = ~slt_src.negate; 1865 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1866 } 1867 } 1868 } else { 1869 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1870 } 1871 break; 1872 1873 case ir_binop_logic_xor: 1874 if (native_integers) 1875 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1876 else 1877 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1878 break; 1879 1880 case ir_binop_logic_or: { 1881 if (native_integers) { 1882 /* If integers are used as booleans, we can use an actual "or" 1883 * instruction. 1884 */ 1885 assert(native_integers); 1886 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1887 } else { 1888 /* After the addition, the value will be an integer on the 1889 * range [0,2]. Zero stays zero, and positive values become 1.0. 1890 */ 1891 glsl_to_tgsi_instruction *add = 1892 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1893 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1894 /* The clamping to [0,1] can be done for free in the fragment 1895 * shader with a saturate if floats are being used as boolean values. 1896 */ 1897 add->saturate = true; 1898 } else { 1899 /* Negating the result of the addition gives values on the range 1900 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1901 * is achieved using SLT. 1902 */ 1903 st_src_reg slt_src = result_src; 1904 slt_src.negate = ~slt_src.negate; 1905 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1906 } 1907 } 1908 break; 1909 } 1910 1911 case ir_binop_logic_and: 1912 /* If native integers are disabled, the bool args are stored as float 0.0 1913 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1914 * actual AND opcode. 1915 */ 1916 if (native_integers) 1917 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1918 else 1919 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1920 break; 1921 1922 case ir_binop_dot: 1923 assert(ir->operands[0]->type->is_vector()); 1924 assert(ir->operands[0]->type == ir->operands[1]->type); 1925 emit_dp(ir, result_dst, op[0], op[1], 1926 ir->operands[0]->type->vector_elements); 1927 break; 1928 1929 case ir_unop_sqrt: 1930 if (have_sqrt) { 1931 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); 1932 } else { 1933 /* This is the only instruction sequence that makes the game "Risen" 1934 * render correctly. ABS is not required for the game, but since GLSL 1935 * declares negative values as "undefined", allowing us to do whatever 1936 * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ 1937 * behavior. 1938 */ 1939 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); 1940 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); 1941 } 1942 break; 1943 case ir_unop_rsq: 1944 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1945 break; 1946 case ir_unop_i2f: 1947 if (native_integers) { 1948 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1949 break; 1950 } 1951 /* fallthrough to next case otherwise */ 1952 case ir_unop_b2f: 1953 if (native_integers) { 1954 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1955 break; 1956 } 1957 /* fallthrough to next case otherwise */ 1958 case ir_unop_i2u: 1959 case ir_unop_u2i: 1960 /* Converting between signed and unsigned integers is a no-op. */ 1961 result_src = op[0]; 1962 result_src.type = result_dst.type; 1963 break; 1964 case ir_unop_b2i: 1965 if (native_integers) { 1966 /* Booleans are stored as integers using ~0 for true and 0 for false. 1967 * GLSL requires that int(bool) return 1 for true and 0 for false. 1968 * This conversion is done with AND, but it could be done with NEG. 1969 */ 1970 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1971 } else { 1972 /* Booleans and integers are both stored as floats when native 1973 * integers are disabled. 1974 */ 1975 result_src = op[0]; 1976 } 1977 break; 1978 case ir_unop_f2i: 1979 if (native_integers) 1980 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1981 else 1982 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1983 break; 1984 case ir_unop_f2u: 1985 if (native_integers) 1986 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); 1987 else 1988 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1989 break; 1990 case ir_unop_bitcast_f2i: 1991 case ir_unop_bitcast_f2u: 1992 /* Make sure we don't propagate the negate modifier to integer opcodes. */ 1993 if (op[0].negate || op[0].abs) 1994 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1995 else 1996 result_src = op[0]; 1997 result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : 1998 GLSL_TYPE_UINT; 1999 break; 2000 case ir_unop_bitcast_i2f: 2001 case ir_unop_bitcast_u2f: 2002 result_src = op[0]; 2003 result_src.type = GLSL_TYPE_FLOAT; 2004 break; 2005 case ir_unop_f2b: 2006 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 2007 break; 2008 case ir_unop_d2b: 2009 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); 2010 break; 2011 case ir_unop_i2b: 2012 if (native_integers) 2013 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); 2014 else 2015 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 2016 break; 2017 case ir_unop_trunc: 2018 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 2019 break; 2020 case ir_unop_ceil: 2021 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 2022 break; 2023 case ir_unop_floor: 2024 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 2025 break; 2026 case ir_unop_round_even: 2027 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 2028 break; 2029 case ir_unop_fract: 2030 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 2031 break; 2032 2033 case ir_binop_min: 2034 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 2035 break; 2036 case ir_binop_max: 2037 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 2038 break; 2039 case ir_binop_pow: 2040 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 2041 break; 2042 2043 case ir_unop_bit_not: 2044 if (native_integers) { 2045 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 2046 break; 2047 } 2048 case ir_unop_u2f: 2049 if (native_integers) { 2050 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 2051 break; 2052 } 2053 case ir_binop_lshift: 2054 if (native_integers) { 2055 emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 2056 break; 2057 } 2058 case ir_binop_rshift: 2059 if (native_integers) { 2060 emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 2061 break; 2062 } 2063 case ir_binop_bit_and: 2064 if (native_integers) { 2065 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 2066 break; 2067 } 2068 case ir_binop_bit_xor: 2069 if (native_integers) { 2070 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 2071 break; 2072 } 2073 case ir_binop_bit_or: 2074 if (native_integers) { 2075 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 2076 break; 2077 } 2078 2079 assert(!"GLSL 1.30 features unsupported"); 2080 break; 2081 2082 case ir_binop_ubo_load: { 2083 ir_constant *const_uniform_block = ir->operands[0]->as_constant(); 2084 ir_constant *const_offset_ir = ir->operands[1]->as_constant(); 2085 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; 2086 unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0; 2087 st_src_reg index_reg = get_temp(glsl_type::uint_type); 2088 st_src_reg cbuf; 2089 2090 cbuf.type = ir->type->base_type; 2091 cbuf.file = PROGRAM_CONSTANT; 2092 cbuf.index = 0; 2093 cbuf.reladdr = NULL; 2094 cbuf.negate = 0; 2095 cbuf.abs = 0; 2096 2097 assert(ir->type->is_vector() || ir->type->is_scalar()); 2098 2099 if (const_offset_ir) { 2100 /* Constant index into constant buffer */ 2101 cbuf.reladdr = NULL; 2102 cbuf.index = const_offset / 16; 2103 } 2104 else { 2105 ir_expression *offset_expr = ir->operands[1]->as_expression(); 2106 st_src_reg offset = op[1]; 2107 2108 /* The OpenGL spec is written in such a way that accesses with 2109 * non-constant offset are almost always vec4-aligned. The only 2110 * exception to this are members of structs in arrays of structs: 2111 * each struct in an array of structs is at least vec4-aligned, 2112 * but single-element and [ui]vec2 members of the struct may be at 2113 * an offset that is not a multiple of 16 bytes. 2114 * 2115 * Here, we extract that offset, relying on previous passes to always 2116 * generate offset expressions of the form (+ expr constant_offset). 2117 * 2118 * Note that the std430 layout, which allows more cases of alignment 2119 * less than vec4 in arrays, is not supported for uniform blocks, so 2120 * we do not have to deal with it here. 2121 */ 2122 if (offset_expr && offset_expr->operation == ir_binop_add) { 2123 const_offset_ir = offset_expr->operands[1]->as_constant(); 2124 if (const_offset_ir) { 2125 const_offset = const_offset_ir->value.u[0]; 2126 cbuf.index = const_offset / 16; 2127 offset_expr->operands[0]->accept(this); 2128 offset = this->result; 2129 } 2130 } 2131 2132 /* Relative/variable index into constant buffer */ 2133 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, 2134 st_src_reg_for_int(4)); 2135 cbuf.reladdr = ralloc(mem_ctx, st_src_reg); 2136 memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); 2137 } 2138 2139 if (const_uniform_block) { 2140 /* Constant constant buffer */ 2141 cbuf.reladdr2 = NULL; 2142 cbuf.index2D = const_block; 2143 cbuf.has_index2 = true; 2144 } 2145 else { 2146 /* Relative/variable constant buffer */ 2147 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); 2148 cbuf.index2D = 1; 2149 memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg)); 2150 cbuf.has_index2 = true; 2151 } 2152 2153 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); 2154 if (glsl_base_type_is_64bit(cbuf.type)) 2155 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, 2156 const_offset % 16 / 8, 2157 const_offset % 16 / 8, 2158 const_offset % 16 / 8); 2159 else 2160 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, 2161 const_offset % 16 / 4, 2162 const_offset % 16 / 4, 2163 const_offset % 16 / 4); 2164 2165 if (ir->type->base_type == GLSL_TYPE_BOOL) { 2166 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0)); 2167 } else { 2168 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); 2169 } 2170 break; 2171 } 2172 case ir_triop_lrp: 2173 /* note: we have to reorder the three args here */ 2174 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); 2175 break; 2176 case ir_triop_csel: 2177 if (this->ctx->Const.NativeIntegers) 2178 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); 2179 else { 2180 op[0].negate = ~op[0].negate; 2181 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); 2182 } 2183 break; 2184 case ir_triop_bitfield_extract: 2185 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); 2186 break; 2187 case ir_quadop_bitfield_insert: 2188 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); 2189 break; 2190 case ir_unop_bitfield_reverse: 2191 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); 2192 break; 2193 case ir_unop_bit_count: 2194 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); 2195 break; 2196 case ir_unop_find_msb: 2197 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); 2198 break; 2199 case ir_unop_find_lsb: 2200 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); 2201 break; 2202 case ir_binop_imul_high: 2203 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); 2204 break; 2205 case ir_triop_fma: 2206 /* In theory, MAD is incorrect here. */ 2207 if (have_fma) 2208 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); 2209 else 2210 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); 2211 break; 2212 case ir_unop_interpolate_at_centroid: 2213 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); 2214 break; 2215 case ir_binop_interpolate_at_offset: { 2216 /* The y coordinate needs to be flipped for the default fb */ 2217 static const gl_state_index transform_y_state[STATE_LENGTH] 2218 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 2219 2220 unsigned transform_y_index = 2221 _mesa_add_state_reference(this->prog->Parameters, 2222 transform_y_state); 2223 2224 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 2225 transform_y_index, 2226 glsl_type::vec4_type); 2227 transform_y.swizzle = SWIZZLE_XXXX; 2228 2229 st_src_reg temp = get_temp(glsl_type::vec2_type); 2230 st_dst_reg temp_dst = st_dst_reg(temp); 2231 2232 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]); 2233 temp_dst.writemask = WRITEMASK_Y; 2234 emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]); 2235 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp); 2236 break; 2237 } 2238 case ir_binop_interpolate_at_sample: 2239 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); 2240 break; 2241 2242 case ir_unop_d2f: 2243 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); 2244 break; 2245 case ir_unop_f2d: 2246 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); 2247 break; 2248 case ir_unop_d2i: 2249 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); 2250 break; 2251 case ir_unop_i2d: 2252 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); 2253 break; 2254 case ir_unop_d2u: 2255 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); 2256 break; 2257 case ir_unop_u2d: 2258 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); 2259 break; 2260 case ir_unop_unpack_double_2x32: 2261 case ir_unop_pack_double_2x32: 2262 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 2263 break; 2264 2265 case ir_binop_ldexp: 2266 if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) { 2267 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); 2268 } else { 2269 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); 2270 } 2271 break; 2272 2273 case ir_unop_pack_half_2x16: 2274 emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]); 2275 break; 2276 case ir_unop_unpack_half_2x16: 2277 emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); 2278 break; 2279 2280 case ir_unop_get_buffer_size: { 2281 ir_constant *const_offset = ir->operands[0]->as_constant(); 2282 st_src_reg buffer( 2283 PROGRAM_BUFFER, 2284 ctx->Const.Program[shader->Stage].MaxAtomicBuffers + 2285 (const_offset ? const_offset->value.u[0] : 0), 2286 GLSL_TYPE_UINT); 2287 if (!const_offset) { 2288 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 2289 *buffer.reladdr = op[0]; 2290 emit_arl(ir, sampler_reladdr, op[0]); 2291 } 2292 emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer; 2293 break; 2294 } 2295 2296 case ir_unop_vote_any: 2297 emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]); 2298 break; 2299 case ir_unop_vote_all: 2300 emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]); 2301 break; 2302 case ir_unop_vote_eq: 2303 emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]); 2304 break; 2305 2306 case ir_unop_pack_snorm_2x16: 2307 case ir_unop_pack_unorm_2x16: 2308 case ir_unop_pack_snorm_4x8: 2309 case ir_unop_pack_unorm_4x8: 2310 2311 case ir_unop_unpack_snorm_2x16: 2312 case ir_unop_unpack_unorm_2x16: 2313 case ir_unop_unpack_snorm_4x8: 2314 case ir_unop_unpack_unorm_4x8: 2315 2316 case ir_quadop_vector: 2317 case ir_binop_vector_extract: 2318 case ir_triop_vector_insert: 2319 case ir_binop_carry: 2320 case ir_binop_borrow: 2321 case ir_unop_ssbo_unsized_array_length: 2322 /* This operation is not supported, or should have already been handled. 2323 */ 2324 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); 2325 break; 2326 } 2327 2328 this->result = result_src; 2329} 2330 2331 2332void 2333glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 2334{ 2335 st_src_reg src; 2336 int i; 2337 int swizzle[4]; 2338 2339 /* Note that this is only swizzles in expressions, not those on the left 2340 * hand side of an assignment, which do write masking. See ir_assignment 2341 * for that. 2342 */ 2343 2344 ir->val->accept(this); 2345 src = this->result; 2346 assert(src.file != PROGRAM_UNDEFINED); 2347 assert(ir->type->vector_elements > 0); 2348 2349 for (i = 0; i < 4; i++) { 2350 if (i < ir->type->vector_elements) { 2351 switch (i) { 2352 case 0: 2353 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 2354 break; 2355 case 1: 2356 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 2357 break; 2358 case 2: 2359 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 2360 break; 2361 case 3: 2362 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 2363 break; 2364 } 2365 } else { 2366 /* If the type is smaller than a vec4, replicate the last 2367 * channel out. 2368 */ 2369 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 2370 } 2371 } 2372 2373 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 2374 2375 this->result = src; 2376} 2377 2378/* Test if the variable is an array. Note that geometry and 2379 * tessellation shader inputs are outputs are always arrays (except 2380 * for patch inputs), so only the array element type is considered. 2381 */ 2382static bool 2383is_inout_array(unsigned stage, ir_variable *var, bool *remove_array) 2384{ 2385 const glsl_type *type = var->type; 2386 2387 *remove_array = false; 2388 2389 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || 2390 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) 2391 return false; 2392 2393 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || 2394 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || 2395 stage == MESA_SHADER_TESS_CTRL) && 2396 !var->data.patch) { 2397 if (!var->type->is_array()) 2398 return false; /* a system value probably */ 2399 2400 type = var->type->fields.array; 2401 *remove_array = true; 2402 } 2403 2404 return type->is_array() || type->is_matrix(); 2405} 2406 2407static unsigned 2408st_translate_interp_loc(ir_variable *var) 2409{ 2410 if (var->data.centroid) 2411 return TGSI_INTERPOLATE_LOC_CENTROID; 2412 else if (var->data.sample) 2413 return TGSI_INTERPOLATE_LOC_SAMPLE; 2414 else 2415 return TGSI_INTERPOLATE_LOC_CENTER; 2416} 2417 2418void 2419glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 2420{ 2421 variable_storage *entry = find_variable_storage(ir->var); 2422 ir_variable *var = ir->var; 2423 bool remove_array; 2424 2425 if (!entry) { 2426 switch (var->data.mode) { 2427 case ir_var_uniform: 2428 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 2429 var->data.param_index); 2430 this->variables.push_tail(entry); 2431 break; 2432 case ir_var_shader_in: { 2433 /* The linker assigns locations for varyings and attributes, 2434 * including deprecated builtins (like gl_Color), user-assign 2435 * generic attributes (glBindVertexLocation), and 2436 * user-defined varyings. 2437 */ 2438 assert(var->data.location != -1); 2439 2440 const glsl_type *type_without_array = var->type->without_array(); 2441 struct inout_decl *decl = &inputs[num_inputs]; 2442 unsigned component = var->data.location_frac; 2443 unsigned num_components; 2444 num_inputs++; 2445 2446 if (type_without_array->is_64bit()) 2447 component = component / 2; 2448 if (type_without_array->vector_elements) 2449 num_components = type_without_array->vector_elements; 2450 else 2451 num_components = 4; 2452 2453 decl->mesa_index = var->data.location; 2454 decl->interp = (glsl_interp_mode) var->data.interpolation; 2455 decl->interp_loc = st_translate_interp_loc(var); 2456 decl->base_type = type_without_array->base_type; 2457 decl->usage_mask = u_bit_consecutive(component, num_components); 2458 2459 if (is_inout_array(shader->Stage, var, &remove_array)) { 2460 decl->array_id = num_input_arrays + 1; 2461 num_input_arrays++; 2462 } else { 2463 decl->array_id = 0; 2464 } 2465 2466 if (remove_array) 2467 decl->size = type_size(var->type->fields.array); 2468 else 2469 decl->size = type_size(var->type); 2470 2471 entry = new(mem_ctx) variable_storage(var, 2472 PROGRAM_INPUT, 2473 decl->mesa_index, 2474 decl->array_id); 2475 entry->component = component; 2476 2477 this->variables.push_tail(entry); 2478 break; 2479 } 2480 case ir_var_shader_out: { 2481 assert(var->data.location != -1); 2482 2483 const glsl_type *type_without_array = var->type->without_array(); 2484 struct inout_decl *decl = &outputs[num_outputs]; 2485 unsigned component = var->data.location_frac; 2486 unsigned num_components; 2487 num_outputs++; 2488 2489 if (type_without_array->is_64bit()) 2490 component = component / 2; 2491 if (type_without_array->vector_elements) 2492 num_components = type_without_array->vector_elements; 2493 else 2494 num_components = 4; 2495 2496 decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; 2497 decl->base_type = type_without_array->base_type; 2498 decl->usage_mask = u_bit_consecutive(component, num_components); 2499 if (var->data.stream & (1u << 31)) { 2500 decl->gs_out_streams = var->data.stream & ~(1u << 31); 2501 } else { 2502 assert(var->data.stream < 4); 2503 decl->gs_out_streams = 0; 2504 for (unsigned i = 0; i < num_components; ++i) 2505 decl->gs_out_streams |= var->data.stream << (2 * (component + i)); 2506 } 2507 2508 if (is_inout_array(shader->Stage, var, &remove_array)) { 2509 decl->array_id = num_output_arrays + 1; 2510 num_output_arrays++; 2511 } else { 2512 decl->array_id = 0; 2513 } 2514 2515 if (remove_array) 2516 decl->size = type_size(var->type->fields.array); 2517 else 2518 decl->size = type_size(var->type); 2519 2520 if (var->data.fb_fetch_output) { 2521 st_dst_reg dst = st_dst_reg(get_temp(var->type)); 2522 st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, 2523 var->type, component, decl->array_id); 2524 emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); 2525 entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, 2526 dst.array_id); 2527 } else { 2528 entry = new(mem_ctx) variable_storage(var, 2529 PROGRAM_OUTPUT, 2530 decl->mesa_index, 2531 decl->array_id); 2532 } 2533 entry->component = component; 2534 2535 this->variables.push_tail(entry); 2536 break; 2537 } 2538 case ir_var_system_value: 2539 entry = new(mem_ctx) variable_storage(var, 2540 PROGRAM_SYSTEM_VALUE, 2541 var->data.location); 2542 break; 2543 case ir_var_auto: 2544 case ir_var_temporary: 2545 st_src_reg src = get_temp(var->type); 2546 2547 entry = new(mem_ctx) variable_storage(var, src.file, src.index, 2548 src.array_id); 2549 this->variables.push_tail(entry); 2550 2551 break; 2552 } 2553 2554 if (!entry) { 2555 printf("Failed to make storage for %s\n", var->name); 2556 exit(1); 2557 } 2558 } 2559 2560 this->result = st_src_reg(entry->file, entry->index, var->type, 2561 entry->component, entry->array_id); 2562 if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double()) 2563 this->result.is_double_vertex_input = true; 2564 if (!native_integers) 2565 this->result.type = GLSL_TYPE_FLOAT; 2566} 2567 2568static void 2569shrink_array_declarations(struct inout_decl *decls, unsigned count, 2570 GLbitfield64* usage_mask, 2571 GLbitfield64 double_usage_mask, 2572 GLbitfield* patch_usage_mask) 2573{ 2574 unsigned i; 2575 int j; 2576 2577 /* Fix array declarations by removing unused array elements at both ends 2578 * of the arrays. For example, mat4[3] where only mat[1] is used. 2579 */ 2580 for (i = 0; i < count; i++) { 2581 struct inout_decl *decl = &decls[i]; 2582 if (!decl->array_id) 2583 continue; 2584 2585 /* Shrink the beginning. */ 2586 for (j = 0; j < (int)decl->size; j++) { 2587 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2588 if (*patch_usage_mask & 2589 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2590 break; 2591 } 2592 else { 2593 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2594 break; 2595 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2596 break; 2597 } 2598 2599 decl->mesa_index++; 2600 decl->size--; 2601 j--; 2602 } 2603 2604 /* Shrink the end. */ 2605 for (j = decl->size-1; j >= 0; j--) { 2606 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2607 if (*patch_usage_mask & 2608 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2609 break; 2610 } 2611 else { 2612 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2613 break; 2614 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2615 break; 2616 } 2617 2618 decl->size--; 2619 } 2620 2621 /* When not all entries of an array are accessed, we mark them as used 2622 * here anyway, to ensure that the input/output mapping logic doesn't get 2623 * confused. 2624 * 2625 * TODO This happens when an array isn't used via indirect access, which 2626 * some game ports do (at least eON-based). There is an optimization 2627 * opportunity here by replacing the array declaration with non-array 2628 * declarations of those slots that are actually used. 2629 */ 2630 for (j = 1; j < (int)decl->size; ++j) { 2631 if (decl->mesa_index >= VARYING_SLOT_PATCH0) 2632 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); 2633 else 2634 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); 2635 } 2636 } 2637} 2638 2639void 2640glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 2641{ 2642 ir_constant *index; 2643 st_src_reg src; 2644 int element_size = type_size(ir->type); 2645 bool is_2D = false; 2646 2647 index = ir->array_index->constant_expression_value(); 2648 2649 ir->array->accept(this); 2650 src = this->result; 2651 2652 if (ir->array->ir_type != ir_type_dereference_array) { 2653 switch (this->prog->Target) { 2654 case GL_TESS_CONTROL_PROGRAM_NV: 2655 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) && 2656 !ir->variable_referenced()->data.patch; 2657 break; 2658 case GL_TESS_EVALUATION_PROGRAM_NV: 2659 is_2D = src.file == PROGRAM_INPUT && 2660 !ir->variable_referenced()->data.patch; 2661 break; 2662 case GL_GEOMETRY_PROGRAM_NV: 2663 is_2D = src.file == PROGRAM_INPUT; 2664 break; 2665 } 2666 } 2667 2668 if (is_2D) 2669 element_size = 1; 2670 2671 if (index) { 2672 2673 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 2674 src.file == PROGRAM_INPUT) 2675 element_size = attrib_type_size(ir->type, true); 2676 if (is_2D) { 2677 src.index2D = index->value.i[0]; 2678 src.has_index2 = true; 2679 } else 2680 src.index += index->value.i[0] * element_size; 2681 } else { 2682 /* Variable index array dereference. It eats the "vec4" of the 2683 * base of the array and an index that offsets the TGSI register 2684 * index. 2685 */ 2686 ir->array_index->accept(this); 2687 2688 st_src_reg index_reg; 2689 2690 if (element_size == 1) { 2691 index_reg = this->result; 2692 } else { 2693 index_reg = get_temp(native_integers ? 2694 glsl_type::int_type : glsl_type::float_type); 2695 2696 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 2697 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2698 } 2699 2700 /* If there was already a relative address register involved, add the 2701 * new and the old together to get the new offset. 2702 */ 2703 if (!is_2D && src.reladdr != NULL) { 2704 st_src_reg accum_reg = get_temp(native_integers ? 2705 glsl_type::int_type : glsl_type::float_type); 2706 2707 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2708 index_reg, *src.reladdr); 2709 2710 index_reg = accum_reg; 2711 } 2712 2713 if (is_2D) { 2714 src.reladdr2 = ralloc(mem_ctx, st_src_reg); 2715 memcpy(src.reladdr2, &index_reg, sizeof(index_reg)); 2716 src.index2D = 0; 2717 src.has_index2 = true; 2718 } else { 2719 src.reladdr = ralloc(mem_ctx, st_src_reg); 2720 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2721 } 2722 } 2723 2724 /* Change the register type to the element type of the array. */ 2725 src.type = ir->type->base_type; 2726 2727 this->result = src; 2728} 2729 2730void 2731glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2732{ 2733 unsigned int i; 2734 const glsl_type *struct_type = ir->record->type; 2735 int offset = 0; 2736 2737 ir->record->accept(this); 2738 2739 for (i = 0; i < struct_type->length; i++) { 2740 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2741 break; 2742 offset += type_size(struct_type->fields.structure[i].type); 2743 } 2744 2745 /* If the type is smaller than a vec4, replicate the last channel out. */ 2746 if (ir->type->is_scalar() || ir->type->is_vector()) 2747 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2748 else 2749 this->result.swizzle = SWIZZLE_NOOP; 2750 2751 this->result.index += offset; 2752 this->result.type = ir->type->base_type; 2753} 2754 2755/** 2756 * We want to be careful in assignment setup to hit the actual storage 2757 * instead of potentially using a temporary like we might with the 2758 * ir_dereference handler. 2759 */ 2760static st_dst_reg 2761get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) 2762{ 2763 /* The LHS must be a dereference. If the LHS is a variable indexed array 2764 * access of a vector, it must be separated into a series conditional moves 2765 * before reaching this point (see ir_vec_index_to_cond_assign). 2766 */ 2767 assert(ir->as_dereference()); 2768 ir_dereference_array *deref_array = ir->as_dereference_array(); 2769 if (deref_array) { 2770 assert(!deref_array->array->type->is_vector()); 2771 } 2772 2773 /* Use the rvalue deref handler for the most part. We write swizzles using 2774 * the writemask, but we do extract the base component for enhanced layouts 2775 * from the source swizzle. 2776 */ 2777 ir->accept(v); 2778 *component = GET_SWZ(v->result.swizzle, 0); 2779 return st_dst_reg(v->result); 2780} 2781 2782/** 2783 * Process the condition of a conditional assignment 2784 * 2785 * Examines the condition of a conditional assignment to generate the optimal 2786 * first operand of a \c CMP instruction. If the condition is a relational 2787 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2788 * used as the source for the \c CMP instruction. Otherwise the comparison 2789 * is processed to a boolean result, and the boolean result is used as the 2790 * operand to the CMP instruction. 2791 */ 2792bool 2793glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2794{ 2795 ir_rvalue *src_ir = ir; 2796 bool negate = true; 2797 bool switch_order = false; 2798 2799 ir_expression *const expr = ir->as_expression(); 2800 2801 if (native_integers) { 2802 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2803 enum glsl_base_type type = expr->operands[0]->type->base_type; 2804 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT || 2805 type == GLSL_TYPE_BOOL) { 2806 if (expr->operation == ir_binop_equal) { 2807 if (expr->operands[0]->is_zero()) { 2808 src_ir = expr->operands[1]; 2809 switch_order = true; 2810 } 2811 else if (expr->operands[1]->is_zero()) { 2812 src_ir = expr->operands[0]; 2813 switch_order = true; 2814 } 2815 } 2816 else if (expr->operation == ir_binop_nequal) { 2817 if (expr->operands[0]->is_zero()) { 2818 src_ir = expr->operands[1]; 2819 } 2820 else if (expr->operands[1]->is_zero()) { 2821 src_ir = expr->operands[0]; 2822 } 2823 } 2824 } 2825 } 2826 2827 src_ir->accept(this); 2828 return switch_order; 2829 } 2830 2831 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2832 bool zero_on_left = false; 2833 2834 if (expr->operands[0]->is_zero()) { 2835 src_ir = expr->operands[1]; 2836 zero_on_left = true; 2837 } else if (expr->operands[1]->is_zero()) { 2838 src_ir = expr->operands[0]; 2839 zero_on_left = false; 2840 } 2841 2842 /* a is - 0 + - 0 + 2843 * (a < 0) T F F ( a < 0) T F F 2844 * (0 < a) F F T (-a < 0) F F T 2845 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2846 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2847 * (a > 0) F F T (-a < 0) F F T 2848 * (0 > a) T F F ( a < 0) T F F 2849 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2850 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2851 * 2852 * Note that exchanging the order of 0 and 'a' in the comparison simply 2853 * means that the value of 'a' should be negated. 2854 */ 2855 if (src_ir != ir) { 2856 switch (expr->operation) { 2857 case ir_binop_less: 2858 switch_order = false; 2859 negate = zero_on_left; 2860 break; 2861 2862 case ir_binop_greater: 2863 switch_order = false; 2864 negate = !zero_on_left; 2865 break; 2866 2867 case ir_binop_lequal: 2868 switch_order = true; 2869 negate = !zero_on_left; 2870 break; 2871 2872 case ir_binop_gequal: 2873 switch_order = true; 2874 negate = zero_on_left; 2875 break; 2876 2877 default: 2878 /* This isn't the right kind of comparison afterall, so make sure 2879 * the whole condition is visited. 2880 */ 2881 src_ir = ir; 2882 break; 2883 } 2884 } 2885 } 2886 2887 src_ir->accept(this); 2888 2889 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2890 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2891 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2892 * computing the condition. 2893 */ 2894 if (negate) 2895 this->result.negate = ~this->result.negate; 2896 2897 return switch_order; 2898} 2899 2900void 2901glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 2902 st_dst_reg *l, st_src_reg *r, 2903 st_src_reg *cond, bool cond_swap) 2904{ 2905 if (type->base_type == GLSL_TYPE_STRUCT) { 2906 for (unsigned int i = 0; i < type->length; i++) { 2907 emit_block_mov(ir, type->fields.structure[i].type, l, r, 2908 cond, cond_swap); 2909 } 2910 return; 2911 } 2912 2913 if (type->is_array()) { 2914 for (unsigned int i = 0; i < type->length; i++) { 2915 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap); 2916 } 2917 return; 2918 } 2919 2920 if (type->is_matrix()) { 2921 const struct glsl_type *vec_type; 2922 2923 vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, 2924 type->vector_elements, 1); 2925 2926 for (int i = 0; i < type->matrix_columns; i++) { 2927 emit_block_mov(ir, vec_type, l, r, cond, cond_swap); 2928 } 2929 return; 2930 } 2931 2932 assert(type->is_scalar() || type->is_vector()); 2933 2934 l->type = type->base_type; 2935 r->type = type->base_type; 2936 if (cond) { 2937 st_src_reg l_src = st_src_reg(*l); 2938 l_src.swizzle = swizzle_for_size(type->vector_elements); 2939 2940 if (native_integers) { 2941 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, 2942 cond_swap ? l_src : *r, 2943 cond_swap ? *r : l_src); 2944 } else { 2945 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, 2946 cond_swap ? l_src : *r, 2947 cond_swap ? *r : l_src); 2948 } 2949 } else { 2950 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); 2951 } 2952 l->index++; 2953 r->index++; 2954 if (type->is_dual_slot()) { 2955 l->index++; 2956 if (r->is_double_vertex_input == false) 2957 r->index++; 2958 } 2959} 2960 2961void 2962glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2963{ 2964 int dst_component; 2965 st_dst_reg l; 2966 st_src_reg r; 2967 2968 ir->rhs->accept(this); 2969 r = this->result; 2970 2971 l = get_assignment_lhs(ir->lhs, this, &dst_component); 2972 2973 { 2974 int swizzles[4]; 2975 int first_enabled_chan = 0; 2976 int rhs_chan = 0; 2977 ir_variable *variable = ir->lhs->variable_referenced(); 2978 2979 if (shader->Stage == MESA_SHADER_FRAGMENT && 2980 variable->data.mode == ir_var_shader_out && 2981 (variable->data.location == FRAG_RESULT_DEPTH || 2982 variable->data.location == FRAG_RESULT_STENCIL)) { 2983 assert(ir->lhs->type->is_scalar()); 2984 assert(ir->write_mask == WRITEMASK_X); 2985 2986 if (variable->data.location == FRAG_RESULT_DEPTH) 2987 l.writemask = WRITEMASK_Z; 2988 else { 2989 assert(variable->data.location == FRAG_RESULT_STENCIL); 2990 l.writemask = WRITEMASK_Y; 2991 } 2992 } else if (ir->write_mask == 0) { 2993 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2994 2995 unsigned num_elements = ir->lhs->type->without_array()->vector_elements; 2996 2997 if (num_elements) { 2998 l.writemask = u_bit_consecutive(0, num_elements); 2999 } else { 3000 /* The type is a struct or an array of (array of) structs. */ 3001 l.writemask = WRITEMASK_XYZW; 3002 } 3003 } else { 3004 l.writemask = ir->write_mask; 3005 } 3006 3007 for (int i = 0; i < 4; i++) { 3008 if (l.writemask & (1 << i)) { 3009 first_enabled_chan = GET_SWZ(r.swizzle, i); 3010 break; 3011 } 3012 } 3013 3014 l.writemask = l.writemask << dst_component; 3015 3016 /* Swizzle a small RHS vector into the channels being written. 3017 * 3018 * glsl ir treats write_mask as dictating how many channels are 3019 * present on the RHS while TGSI treats write_mask as just 3020 * showing which channels of the vec4 RHS get written. 3021 */ 3022 for (int i = 0; i < 4; i++) { 3023 if (l.writemask & (1 << i)) 3024 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 3025 else 3026 swizzles[i] = first_enabled_chan; 3027 } 3028 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 3029 swizzles[2], swizzles[3]); 3030 } 3031 3032 assert(l.file != PROGRAM_UNDEFINED); 3033 assert(r.file != PROGRAM_UNDEFINED); 3034 3035 if (ir->condition) { 3036 const bool switch_order = this->process_move_condition(ir->condition); 3037 st_src_reg condition = this->result; 3038 3039 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order); 3040 } else if (ir->rhs->as_expression() && 3041 this->instructions.get_tail() && 3042 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 3043 !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && 3044 type_size(ir->lhs->type) == 1 && 3045 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { 3046 /* To avoid emitting an extra MOV when assigning an expression to a 3047 * variable, emit the last instruction of the expression again, but 3048 * replace the destination register with the target of the assignment. 3049 * Dead code elimination will remove the original instruction. 3050 */ 3051 glsl_to_tgsi_instruction *inst, *new_inst; 3052 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 3053 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]); 3054 new_inst->saturate = inst->saturate; 3055 inst->dead_mask = inst->dst[0].writemask; 3056 } else { 3057 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); 3058 } 3059} 3060 3061 3062void 3063glsl_to_tgsi_visitor::visit(ir_constant *ir) 3064{ 3065 st_src_reg src; 3066 GLdouble stack_vals[4] = { 0 }; 3067 gl_constant_value *values = (gl_constant_value *) stack_vals; 3068 GLenum gl_type = GL_NONE; 3069 unsigned int i; 3070 static int in_array = 0; 3071 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 3072 3073 /* Unfortunately, 4 floats is all we can get into 3074 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 3075 * aggregate constant and move each constant value into it. If we 3076 * get lucky, copy propagation will eliminate the extra moves. 3077 */ 3078 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 3079 st_src_reg temp_base = get_temp(ir->type); 3080 st_dst_reg temp = st_dst_reg(temp_base); 3081 3082 foreach_in_list(ir_constant, field_value, &ir->components) { 3083 int size = type_size(field_value->type); 3084 3085 assert(size > 0); 3086 3087 field_value->accept(this); 3088 src = this->result; 3089 3090 for (i = 0; i < (unsigned int)size; i++) { 3091 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3092 3093 src.index++; 3094 temp.index++; 3095 } 3096 } 3097 this->result = temp_base; 3098 return; 3099 } 3100 3101 if (ir->type->is_array()) { 3102 st_src_reg temp_base = get_temp(ir->type); 3103 st_dst_reg temp = st_dst_reg(temp_base); 3104 int size = type_size(ir->type->fields.array); 3105 3106 assert(size > 0); 3107 in_array++; 3108 3109 for (i = 0; i < ir->type->length; i++) { 3110 ir->array_elements[i]->accept(this); 3111 src = this->result; 3112 for (int j = 0; j < size; j++) { 3113 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3114 3115 src.index++; 3116 temp.index++; 3117 } 3118 } 3119 this->result = temp_base; 3120 in_array--; 3121 return; 3122 } 3123 3124 if (ir->type->is_matrix()) { 3125 st_src_reg mat = get_temp(ir->type); 3126 st_dst_reg mat_column = st_dst_reg(mat); 3127 3128 for (i = 0; i < ir->type->matrix_columns; i++) { 3129 switch (ir->type->base_type) { 3130 case GLSL_TYPE_FLOAT: 3131 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 3132 3133 src = st_src_reg(file, -1, ir->type->base_type); 3134 src.index = add_constant(file, 3135 values, 3136 ir->type->vector_elements, 3137 GL_FLOAT, 3138 &src.swizzle); 3139 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3140 break; 3141 case GLSL_TYPE_DOUBLE: 3142 values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements]; 3143 src = st_src_reg(file, -1, ir->type->base_type); 3144 src.index = add_constant(file, 3145 values, 3146 ir->type->vector_elements, 3147 GL_DOUBLE, 3148 &src.swizzle); 3149 if (ir->type->vector_elements >= 2) { 3150 mat_column.writemask = WRITEMASK_XY; 3151 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 3152 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3153 } else { 3154 mat_column.writemask = WRITEMASK_X; 3155 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); 3156 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3157 } 3158 src.index++; 3159 if (ir->type->vector_elements > 2) { 3160 if (ir->type->vector_elements == 4) { 3161 mat_column.writemask = WRITEMASK_ZW; 3162 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 3163 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3164 } else { 3165 mat_column.writemask = WRITEMASK_Z; 3166 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); 3167 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3168 mat_column.writemask = WRITEMASK_XYZW; 3169 src.swizzle = SWIZZLE_XYZW; 3170 } 3171 mat_column.index++; 3172 } 3173 break; 3174 default: 3175 unreachable("Illegal matrix constant type.\n"); 3176 break; 3177 } 3178 mat_column.index++; 3179 } 3180 this->result = mat; 3181 return; 3182 } 3183 3184 switch (ir->type->base_type) { 3185 case GLSL_TYPE_FLOAT: 3186 gl_type = GL_FLOAT; 3187 for (i = 0; i < ir->type->vector_elements; i++) { 3188 values[i].f = ir->value.f[i]; 3189 } 3190 break; 3191 case GLSL_TYPE_DOUBLE: 3192 gl_type = GL_DOUBLE; 3193 for (i = 0; i < ir->type->vector_elements; i++) { 3194 memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); 3195 } 3196 break; 3197 case GLSL_TYPE_UINT: 3198 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 3199 for (i = 0; i < ir->type->vector_elements; i++) { 3200 if (native_integers) 3201 values[i].u = ir->value.u[i]; 3202 else 3203 values[i].f = ir->value.u[i]; 3204 } 3205 break; 3206 case GLSL_TYPE_INT: 3207 gl_type = native_integers ? GL_INT : GL_FLOAT; 3208 for (i = 0; i < ir->type->vector_elements; i++) { 3209 if (native_integers) 3210 values[i].i = ir->value.i[i]; 3211 else 3212 values[i].f = ir->value.i[i]; 3213 } 3214 break; 3215 case GLSL_TYPE_BOOL: 3216 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 3217 for (i = 0; i < ir->type->vector_elements; i++) { 3218 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; 3219 } 3220 break; 3221 default: 3222 assert(!"Non-float/uint/int/bool constant"); 3223 } 3224 3225 this->result = st_src_reg(file, -1, ir->type); 3226 this->result.index = add_constant(file, 3227 values, 3228 ir->type->vector_elements, 3229 gl_type, 3230 &this->result.swizzle); 3231} 3232 3233void 3234glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) 3235{ 3236 exec_node *param = ir->actual_parameters.get_head(); 3237 ir_dereference *deref = static_cast<ir_dereference *>(param); 3238 ir_variable *location = deref->variable_referenced(); 3239 3240 st_src_reg buffer( 3241 PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT); 3242 3243 /* Calculate the surface offset */ 3244 st_src_reg offset; 3245 unsigned array_size = 0, base = 0; 3246 uint16_t index = 0; 3247 3248 get_deref_offsets(deref, &array_size, &base, &index, &offset, false); 3249 3250 if (offset.file != PROGRAM_UNDEFINED) { 3251 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), 3252 offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); 3253 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), 3254 offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); 3255 } else { 3256 offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); 3257 } 3258 3259 ir->return_deref->accept(this); 3260 st_dst_reg dst(this->result); 3261 dst.writemask = WRITEMASK_X; 3262 3263 glsl_to_tgsi_instruction *inst; 3264 3265 if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) { 3266 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); 3267 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) { 3268 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3269 st_src_reg_for_int(1)); 3270 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) { 3271 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3272 st_src_reg_for_int(-1)); 3273 emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); 3274 } else { 3275 param = param->get_next(); 3276 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3277 val->accept(this); 3278 3279 st_src_reg data = this->result, data2 = undef_src; 3280 unsigned opcode; 3281 switch (ir->callee->intrinsic_id) { 3282 case ir_intrinsic_atomic_counter_add: 3283 opcode = TGSI_OPCODE_ATOMUADD; 3284 break; 3285 case ir_intrinsic_atomic_counter_min: 3286 opcode = TGSI_OPCODE_ATOMIMIN; 3287 break; 3288 case ir_intrinsic_atomic_counter_max: 3289 opcode = TGSI_OPCODE_ATOMIMAX; 3290 break; 3291 case ir_intrinsic_atomic_counter_and: 3292 opcode = TGSI_OPCODE_ATOMAND; 3293 break; 3294 case ir_intrinsic_atomic_counter_or: 3295 opcode = TGSI_OPCODE_ATOMOR; 3296 break; 3297 case ir_intrinsic_atomic_counter_xor: 3298 opcode = TGSI_OPCODE_ATOMXOR; 3299 break; 3300 case ir_intrinsic_atomic_counter_exchange: 3301 opcode = TGSI_OPCODE_ATOMXCHG; 3302 break; 3303 case ir_intrinsic_atomic_counter_comp_swap: { 3304 opcode = TGSI_OPCODE_ATOMCAS; 3305 param = param->get_next(); 3306 val = ((ir_instruction *)param)->as_rvalue(); 3307 val->accept(this); 3308 data2 = this->result; 3309 break; 3310 } 3311 default: 3312 assert(!"Unexpected intrinsic"); 3313 return; 3314 } 3315 3316 inst = emit_asm(ir, opcode, dst, offset, data, data2); 3317 } 3318 3319 inst->resource = buffer; 3320} 3321 3322void 3323glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) 3324{ 3325 exec_node *param = ir->actual_parameters.get_head(); 3326 3327 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 3328 3329 param = param->get_next(); 3330 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3331 3332 ir_constant *const_block = block->as_constant(); 3333 3334 st_src_reg buffer( 3335 PROGRAM_BUFFER, 3336 ctx->Const.Program[shader->Stage].MaxAtomicBuffers + 3337 (const_block ? const_block->value.u[0] : 0), 3338 GLSL_TYPE_UINT); 3339 3340 if (!const_block) { 3341 block->accept(this); 3342 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 3343 *buffer.reladdr = this->result; 3344 emit_arl(ir, sampler_reladdr, this->result); 3345 } 3346 3347 /* Calculate the surface offset */ 3348 offset->accept(this); 3349 st_src_reg off = this->result; 3350 3351 st_dst_reg dst = undef_dst; 3352 if (ir->return_deref) { 3353 ir->return_deref->accept(this); 3354 dst = st_dst_reg(this->result); 3355 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3356 } 3357 3358 glsl_to_tgsi_instruction *inst; 3359 3360 if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { 3361 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3362 if (dst.type == GLSL_TYPE_BOOL) 3363 emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); 3364 } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { 3365 param = param->get_next(); 3366 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3367 val->accept(this); 3368 3369 param = param->get_next(); 3370 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3371 assert(write_mask); 3372 dst.writemask = write_mask->value.u[0]; 3373 3374 dst.type = this->result.type; 3375 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3376 } else { 3377 param = param->get_next(); 3378 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3379 val->accept(this); 3380 3381 st_src_reg data = this->result, data2 = undef_src; 3382 unsigned opcode; 3383 switch (ir->callee->intrinsic_id) { 3384 case ir_intrinsic_ssbo_atomic_add: 3385 opcode = TGSI_OPCODE_ATOMUADD; 3386 break; 3387 case ir_intrinsic_ssbo_atomic_min: 3388 opcode = TGSI_OPCODE_ATOMIMIN; 3389 break; 3390 case ir_intrinsic_ssbo_atomic_max: 3391 opcode = TGSI_OPCODE_ATOMIMAX; 3392 break; 3393 case ir_intrinsic_ssbo_atomic_and: 3394 opcode = TGSI_OPCODE_ATOMAND; 3395 break; 3396 case ir_intrinsic_ssbo_atomic_or: 3397 opcode = TGSI_OPCODE_ATOMOR; 3398 break; 3399 case ir_intrinsic_ssbo_atomic_xor: 3400 opcode = TGSI_OPCODE_ATOMXOR; 3401 break; 3402 case ir_intrinsic_ssbo_atomic_exchange: 3403 opcode = TGSI_OPCODE_ATOMXCHG; 3404 break; 3405 case ir_intrinsic_ssbo_atomic_comp_swap: 3406 opcode = TGSI_OPCODE_ATOMCAS; 3407 param = param->get_next(); 3408 val = ((ir_instruction *)param)->as_rvalue(); 3409 val->accept(this); 3410 data2 = this->result; 3411 break; 3412 default: 3413 assert(!"Unexpected intrinsic"); 3414 return; 3415 } 3416 3417 inst = emit_asm(ir, opcode, dst, off, data, data2); 3418 } 3419 3420 param = param->get_next(); 3421 ir_constant *access = NULL; 3422 if (!param->is_tail_sentinel()) { 3423 access = ((ir_instruction *)param)->as_constant(); 3424 assert(access); 3425 } 3426 3427 /* The emit_asm() might have actually split the op into pieces, e.g. for 3428 * double stores. We have to go back and fix up all the generated ops. 3429 */ 3430 unsigned op = inst->op; 3431 do { 3432 inst->resource = buffer; 3433 if (access) 3434 inst->buffer_access = access->value.u[0]; 3435 3436 if (inst == this->instructions.get_head_raw()) 3437 break; 3438 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 3439 3440 if (inst->op == TGSI_OPCODE_UADD) { 3441 if (inst == this->instructions.get_head_raw()) 3442 break; 3443 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 3444 } 3445 } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); 3446} 3447 3448void 3449glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) 3450{ 3451 switch (ir->callee->intrinsic_id) { 3452 case ir_intrinsic_memory_barrier: 3453 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3454 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3455 TGSI_MEMBAR_ATOMIC_BUFFER | 3456 TGSI_MEMBAR_SHADER_IMAGE | 3457 TGSI_MEMBAR_SHARED)); 3458 break; 3459 case ir_intrinsic_memory_barrier_atomic_counter: 3460 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3461 st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); 3462 break; 3463 case ir_intrinsic_memory_barrier_buffer: 3464 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3465 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); 3466 break; 3467 case ir_intrinsic_memory_barrier_image: 3468 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3469 st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); 3470 break; 3471 case ir_intrinsic_memory_barrier_shared: 3472 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3473 st_src_reg_for_int(TGSI_MEMBAR_SHARED)); 3474 break; 3475 case ir_intrinsic_group_memory_barrier: 3476 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3477 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3478 TGSI_MEMBAR_ATOMIC_BUFFER | 3479 TGSI_MEMBAR_SHADER_IMAGE | 3480 TGSI_MEMBAR_SHARED | 3481 TGSI_MEMBAR_THREAD_GROUP)); 3482 break; 3483 default: 3484 assert(!"Unexpected memory barrier intrinsic"); 3485 } 3486} 3487 3488void 3489glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) 3490{ 3491 exec_node *param = ir->actual_parameters.get_head(); 3492 3493 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3494 3495 st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); 3496 3497 /* Calculate the surface offset */ 3498 offset->accept(this); 3499 st_src_reg off = this->result; 3500 3501 st_dst_reg dst = undef_dst; 3502 if (ir->return_deref) { 3503 ir->return_deref->accept(this); 3504 dst = st_dst_reg(this->result); 3505 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3506 } 3507 3508 glsl_to_tgsi_instruction *inst; 3509 3510 if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) { 3511 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3512 inst->resource = buffer; 3513 } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) { 3514 param = param->get_next(); 3515 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3516 val->accept(this); 3517 3518 param = param->get_next(); 3519 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3520 assert(write_mask); 3521 dst.writemask = write_mask->value.u[0]; 3522 3523 dst.type = this->result.type; 3524 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3525 inst->resource = buffer; 3526 } else { 3527 param = param->get_next(); 3528 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3529 val->accept(this); 3530 3531 st_src_reg data = this->result, data2 = undef_src; 3532 unsigned opcode; 3533 switch (ir->callee->intrinsic_id) { 3534 case ir_intrinsic_shared_atomic_add: 3535 opcode = TGSI_OPCODE_ATOMUADD; 3536 break; 3537 case ir_intrinsic_shared_atomic_min: 3538 opcode = TGSI_OPCODE_ATOMIMIN; 3539 break; 3540 case ir_intrinsic_shared_atomic_max: 3541 opcode = TGSI_OPCODE_ATOMIMAX; 3542 break; 3543 case ir_intrinsic_shared_atomic_and: 3544 opcode = TGSI_OPCODE_ATOMAND; 3545 break; 3546 case ir_intrinsic_shared_atomic_or: 3547 opcode = TGSI_OPCODE_ATOMOR; 3548 break; 3549 case ir_intrinsic_shared_atomic_xor: 3550 opcode = TGSI_OPCODE_ATOMXOR; 3551 break; 3552 case ir_intrinsic_shared_atomic_exchange: 3553 opcode = TGSI_OPCODE_ATOMXCHG; 3554 break; 3555 case ir_intrinsic_shared_atomic_comp_swap: 3556 opcode = TGSI_OPCODE_ATOMCAS; 3557 param = param->get_next(); 3558 val = ((ir_instruction *)param)->as_rvalue(); 3559 val->accept(this); 3560 data2 = this->result; 3561 break; 3562 default: 3563 assert(!"Unexpected intrinsic"); 3564 return; 3565 } 3566 3567 inst = emit_asm(ir, opcode, dst, off, data, data2); 3568 inst->resource = buffer; 3569 } 3570} 3571 3572void 3573glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) 3574{ 3575 exec_node *param = ir->actual_parameters.get_head(); 3576 3577 ir_dereference *img = (ir_dereference *)param; 3578 const ir_variable *imgvar = img->variable_referenced(); 3579 const glsl_type *type = imgvar->type->without_array(); 3580 unsigned sampler_array_size = 1, sampler_base = 0; 3581 3582 st_src_reg reladdr; 3583 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); 3584 3585 get_deref_offsets(img, &sampler_array_size, &sampler_base, 3586 (uint16_t*)&image.index, &reladdr, true); 3587 3588 if (reladdr.file != PROGRAM_UNDEFINED) { 3589 image.reladdr = ralloc(mem_ctx, st_src_reg); 3590 *image.reladdr = reladdr; 3591 emit_arl(ir, sampler_reladdr, reladdr); 3592 } 3593 3594 st_dst_reg dst = undef_dst; 3595 if (ir->return_deref) { 3596 ir->return_deref->accept(this); 3597 dst = st_dst_reg(this->result); 3598 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3599 } 3600 3601 glsl_to_tgsi_instruction *inst; 3602 3603 if (ir->callee->intrinsic_id == ir_intrinsic_image_size) { 3604 dst.writemask = WRITEMASK_XYZ; 3605 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); 3606 } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) { 3607 st_src_reg res = get_temp(glsl_type::ivec4_type); 3608 st_dst_reg dstres = st_dst_reg(res); 3609 dstres.writemask = WRITEMASK_W; 3610 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres); 3611 res.swizzle = SWIZZLE_WWWW; 3612 emit_asm(ir, TGSI_OPCODE_MOV, dst, res); 3613 } else { 3614 st_src_reg arg1 = undef_src, arg2 = undef_src; 3615 st_src_reg coord; 3616 st_dst_reg coord_dst; 3617 coord = get_temp(glsl_type::ivec4_type); 3618 coord_dst = st_dst_reg(coord); 3619 coord_dst.writemask = (1 << type->coordinate_components()) - 1; 3620 param = param->get_next(); 3621 ((ir_dereference *)param)->accept(this); 3622 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 3623 coord.swizzle = SWIZZLE_XXXX; 3624 switch (type->coordinate_components()) { 3625 case 4: assert(!"unexpected coord count"); 3626 /* fallthrough */ 3627 case 3: coord.swizzle |= SWIZZLE_Z << 6; 3628 /* fallthrough */ 3629 case 2: coord.swizzle |= SWIZZLE_Y << 3; 3630 } 3631 3632 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 3633 param = param->get_next(); 3634 ((ir_dereference *)param)->accept(this); 3635 st_src_reg sample = this->result; 3636 sample.swizzle = SWIZZLE_XXXX; 3637 coord_dst.writemask = WRITEMASK_W; 3638 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); 3639 coord.swizzle |= SWIZZLE_W << 9; 3640 } 3641 3642 param = param->get_next(); 3643 if (!param->is_tail_sentinel()) { 3644 ((ir_dereference *)param)->accept(this); 3645 arg1 = this->result; 3646 param = param->get_next(); 3647 } 3648 3649 if (!param->is_tail_sentinel()) { 3650 ((ir_dereference *)param)->accept(this); 3651 arg2 = this->result; 3652 param = param->get_next(); 3653 } 3654 3655 assert(param->is_tail_sentinel()); 3656 3657 unsigned opcode; 3658 switch (ir->callee->intrinsic_id) { 3659 case ir_intrinsic_image_load: 3660 opcode = TGSI_OPCODE_LOAD; 3661 break; 3662 case ir_intrinsic_image_store: 3663 opcode = TGSI_OPCODE_STORE; 3664 break; 3665 case ir_intrinsic_image_atomic_add: 3666 opcode = TGSI_OPCODE_ATOMUADD; 3667 break; 3668 case ir_intrinsic_image_atomic_min: 3669 opcode = TGSI_OPCODE_ATOMIMIN; 3670 break; 3671 case ir_intrinsic_image_atomic_max: 3672 opcode = TGSI_OPCODE_ATOMIMAX; 3673 break; 3674 case ir_intrinsic_image_atomic_and: 3675 opcode = TGSI_OPCODE_ATOMAND; 3676 break; 3677 case ir_intrinsic_image_atomic_or: 3678 opcode = TGSI_OPCODE_ATOMOR; 3679 break; 3680 case ir_intrinsic_image_atomic_xor: 3681 opcode = TGSI_OPCODE_ATOMXOR; 3682 break; 3683 case ir_intrinsic_image_atomic_exchange: 3684 opcode = TGSI_OPCODE_ATOMXCHG; 3685 break; 3686 case ir_intrinsic_image_atomic_comp_swap: 3687 opcode = TGSI_OPCODE_ATOMCAS; 3688 break; 3689 default: 3690 assert(!"Unexpected intrinsic"); 3691 return; 3692 } 3693 3694 inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); 3695 if (opcode == TGSI_OPCODE_STORE) 3696 inst->dst[0].writemask = WRITEMASK_XYZW; 3697 } 3698 3699 inst->resource = image; 3700 inst->sampler_array_size = sampler_array_size; 3701 inst->sampler_base = sampler_base; 3702 3703 switch (type->sampler_dimensionality) { 3704 case GLSL_SAMPLER_DIM_1D: 3705 inst->tex_target = (type->sampler_array) 3706 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 3707 break; 3708 case GLSL_SAMPLER_DIM_2D: 3709 inst->tex_target = (type->sampler_array) 3710 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 3711 break; 3712 case GLSL_SAMPLER_DIM_3D: 3713 inst->tex_target = TEXTURE_3D_INDEX; 3714 break; 3715 case GLSL_SAMPLER_DIM_CUBE: 3716 inst->tex_target = (type->sampler_array) 3717 ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; 3718 break; 3719 case GLSL_SAMPLER_DIM_RECT: 3720 inst->tex_target = TEXTURE_RECT_INDEX; 3721 break; 3722 case GLSL_SAMPLER_DIM_BUF: 3723 inst->tex_target = TEXTURE_BUFFER_INDEX; 3724 break; 3725 case GLSL_SAMPLER_DIM_EXTERNAL: 3726 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 3727 break; 3728 case GLSL_SAMPLER_DIM_MS: 3729 inst->tex_target = (type->sampler_array) 3730 ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; 3731 break; 3732 default: 3733 assert(!"Should not get here."); 3734 } 3735 3736 inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), 3737 _mesa_get_shader_image_format(imgvar->data.image_format)); 3738 3739 if (imgvar->data.image_coherent) 3740 inst->buffer_access |= TGSI_MEMORY_COHERENT; 3741 if (imgvar->data.image_restrict) 3742 inst->buffer_access |= TGSI_MEMORY_RESTRICT; 3743 if (imgvar->data.image_volatile) 3744 inst->buffer_access |= TGSI_MEMORY_VOLATILE; 3745} 3746 3747void 3748glsl_to_tgsi_visitor::visit(ir_call *ir) 3749{ 3750 ir_function_signature *sig = ir->callee; 3751 3752 /* Filter out intrinsics */ 3753 switch (sig->intrinsic_id) { 3754 case ir_intrinsic_atomic_counter_read: 3755 case ir_intrinsic_atomic_counter_increment: 3756 case ir_intrinsic_atomic_counter_predecrement: 3757 case ir_intrinsic_atomic_counter_add: 3758 case ir_intrinsic_atomic_counter_min: 3759 case ir_intrinsic_atomic_counter_max: 3760 case ir_intrinsic_atomic_counter_and: 3761 case ir_intrinsic_atomic_counter_or: 3762 case ir_intrinsic_atomic_counter_xor: 3763 case ir_intrinsic_atomic_counter_exchange: 3764 case ir_intrinsic_atomic_counter_comp_swap: 3765 visit_atomic_counter_intrinsic(ir); 3766 return; 3767 3768 case ir_intrinsic_ssbo_load: 3769 case ir_intrinsic_ssbo_store: 3770 case ir_intrinsic_ssbo_atomic_add: 3771 case ir_intrinsic_ssbo_atomic_min: 3772 case ir_intrinsic_ssbo_atomic_max: 3773 case ir_intrinsic_ssbo_atomic_and: 3774 case ir_intrinsic_ssbo_atomic_or: 3775 case ir_intrinsic_ssbo_atomic_xor: 3776 case ir_intrinsic_ssbo_atomic_exchange: 3777 case ir_intrinsic_ssbo_atomic_comp_swap: 3778 visit_ssbo_intrinsic(ir); 3779 return; 3780 3781 case ir_intrinsic_memory_barrier: 3782 case ir_intrinsic_memory_barrier_atomic_counter: 3783 case ir_intrinsic_memory_barrier_buffer: 3784 case ir_intrinsic_memory_barrier_image: 3785 case ir_intrinsic_memory_barrier_shared: 3786 case ir_intrinsic_group_memory_barrier: 3787 visit_membar_intrinsic(ir); 3788 return; 3789 3790 case ir_intrinsic_shared_load: 3791 case ir_intrinsic_shared_store: 3792 case ir_intrinsic_shared_atomic_add: 3793 case ir_intrinsic_shared_atomic_min: 3794 case ir_intrinsic_shared_atomic_max: 3795 case ir_intrinsic_shared_atomic_and: 3796 case ir_intrinsic_shared_atomic_or: 3797 case ir_intrinsic_shared_atomic_xor: 3798 case ir_intrinsic_shared_atomic_exchange: 3799 case ir_intrinsic_shared_atomic_comp_swap: 3800 visit_shared_intrinsic(ir); 3801 return; 3802 3803 case ir_intrinsic_image_load: 3804 case ir_intrinsic_image_store: 3805 case ir_intrinsic_image_atomic_add: 3806 case ir_intrinsic_image_atomic_min: 3807 case ir_intrinsic_image_atomic_max: 3808 case ir_intrinsic_image_atomic_and: 3809 case ir_intrinsic_image_atomic_or: 3810 case ir_intrinsic_image_atomic_xor: 3811 case ir_intrinsic_image_atomic_exchange: 3812 case ir_intrinsic_image_atomic_comp_swap: 3813 case ir_intrinsic_image_size: 3814 case ir_intrinsic_image_samples: 3815 visit_image_intrinsic(ir); 3816 return; 3817 3818 case ir_intrinsic_invalid: 3819 case ir_intrinsic_generic_load: 3820 case ir_intrinsic_generic_store: 3821 case ir_intrinsic_generic_atomic_add: 3822 case ir_intrinsic_generic_atomic_and: 3823 case ir_intrinsic_generic_atomic_or: 3824 case ir_intrinsic_generic_atomic_xor: 3825 case ir_intrinsic_generic_atomic_min: 3826 case ir_intrinsic_generic_atomic_max: 3827 case ir_intrinsic_generic_atomic_exchange: 3828 case ir_intrinsic_generic_atomic_comp_swap: 3829 case ir_intrinsic_shader_clock: 3830 unreachable("Invalid intrinsic"); 3831 } 3832} 3833 3834void 3835glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, 3836 unsigned *array_elements, 3837 uint16_t *index, 3838 st_src_reg *indirect, 3839 unsigned *location) 3840{ 3841 switch (tail->ir_type) { 3842 case ir_type_dereference_record: { 3843 ir_dereference_record *deref_record = tail->as_dereference_record(); 3844 const glsl_type *struct_type = deref_record->record->type; 3845 int field_index = deref_record->record->type->field_index(deref_record->field); 3846 3847 calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); 3848 3849 assert(field_index >= 0); 3850 *location += struct_type->record_location_offset(field_index); 3851 break; 3852 } 3853 3854 case ir_type_dereference_array: { 3855 ir_dereference_array *deref_arr = tail->as_dereference_array(); 3856 ir_constant *array_index = deref_arr->array_index->constant_expression_value(); 3857 3858 if (!array_index) { 3859 st_src_reg temp_reg; 3860 st_dst_reg temp_dst; 3861 3862 temp_reg = get_temp(glsl_type::uint_type); 3863 temp_dst = st_dst_reg(temp_reg); 3864 temp_dst.writemask = 1; 3865 3866 deref_arr->array_index->accept(this); 3867 if (*array_elements != 1) 3868 emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements)); 3869 else 3870 emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result); 3871 3872 if (indirect->file == PROGRAM_UNDEFINED) 3873 *indirect = temp_reg; 3874 else { 3875 temp_dst = st_dst_reg(*indirect); 3876 temp_dst.writemask = 1; 3877 emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg); 3878 } 3879 } else 3880 *index += array_index->value.u[0] * *array_elements; 3881 3882 *array_elements *= deref_arr->array->type->length; 3883 3884 calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location); 3885 break; 3886 } 3887 default: 3888 break; 3889 } 3890} 3891 3892void 3893glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, 3894 unsigned *array_size, 3895 unsigned *base, 3896 uint16_t *index, 3897 st_src_reg *reladdr, 3898 bool opaque) 3899{ 3900 GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); 3901 unsigned location = 0; 3902 ir_variable *var = ir->variable_referenced(); 3903 3904 memset(reladdr, 0, sizeof(*reladdr)); 3905 reladdr->file = PROGRAM_UNDEFINED; 3906 3907 *base = 0; 3908 *array_size = 1; 3909 3910 assert(var); 3911 location = var->data.location; 3912 calc_deref_offsets(ir, array_size, index, reladdr, &location); 3913 3914 /* 3915 * If we end up with no indirect then adjust the base to the index, 3916 * and set the array size to 1. 3917 */ 3918 if (reladdr->file == PROGRAM_UNDEFINED) { 3919 *base = *index; 3920 *array_size = 1; 3921 } 3922 3923 if (opaque) { 3924 assert(location != 0xffffffff); 3925 *base += this->shader_program->data->UniformStorage[location].opaque[shader].index; 3926 *index += this->shader_program->data->UniformStorage[location].opaque[shader].index; 3927 } 3928} 3929 3930st_src_reg 3931glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) 3932{ 3933 if (offset.reladdr || offset.reladdr2) { 3934 st_src_reg tmp = get_temp(glsl_type::ivec2_type); 3935 st_dst_reg tmp_dst = st_dst_reg(tmp); 3936 tmp_dst.writemask = WRITEMASK_XY; 3937 emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); 3938 return tmp; 3939 } 3940 3941 return offset; 3942} 3943 3944void 3945glsl_to_tgsi_visitor::visit(ir_texture *ir) 3946{ 3947 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; 3948 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; 3949 st_src_reg levels_src, reladdr; 3950 st_dst_reg result_dst, coord_dst, cube_sc_dst; 3951 glsl_to_tgsi_instruction *inst = NULL; 3952 unsigned opcode = TGSI_OPCODE_NOP; 3953 const glsl_type *sampler_type = ir->sampler->type; 3954 unsigned sampler_array_size = 1, sampler_base = 0; 3955 uint16_t sampler_index = 0; 3956 bool is_cube_array = false; 3957 unsigned i; 3958 3959 /* if we are a cube array sampler */ 3960 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && 3961 sampler_type->sampler_array)) { 3962 is_cube_array = true; 3963 } 3964 3965 if (ir->coordinate) { 3966 ir->coordinate->accept(this); 3967 3968 /* Put our coords in a temp. We'll need to modify them for shadow, 3969 * projection, or LOD, so the only case we'd use it as-is is if 3970 * we're doing plain old texturing. The optimization passes on 3971 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 3972 */ 3973 coord = get_temp(glsl_type::vec4_type); 3974 coord_dst = st_dst_reg(coord); 3975 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; 3976 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 3977 } 3978 3979 if (ir->projector) { 3980 ir->projector->accept(this); 3981 projector = this->result; 3982 } 3983 3984 /* Storage for our result. Ideally for an assignment we'd be using 3985 * the actual storage for the result here, instead. 3986 */ 3987 result_src = get_temp(ir->type); 3988 result_dst = st_dst_reg(result_src); 3989 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 3990 3991 switch (ir->op) { 3992 case ir_tex: 3993 opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; 3994 if (ir->offset) { 3995 ir->offset->accept(this); 3996 offset[0] = this->result; 3997 } 3998 break; 3999 case ir_txb: 4000 if (is_cube_array || 4001 sampler_type == glsl_type::samplerCubeShadow_type) { 4002 opcode = TGSI_OPCODE_TXB2; 4003 } 4004 else { 4005 opcode = TGSI_OPCODE_TXB; 4006 } 4007 ir->lod_info.bias->accept(this); 4008 lod_info = this->result; 4009 if (ir->offset) { 4010 ir->offset->accept(this); 4011 offset[0] = this->result; 4012 } 4013 break; 4014 case ir_txl: 4015 opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; 4016 ir->lod_info.lod->accept(this); 4017 lod_info = this->result; 4018 if (ir->offset) { 4019 ir->offset->accept(this); 4020 offset[0] = this->result; 4021 } 4022 break; 4023 case ir_txd: 4024 opcode = TGSI_OPCODE_TXD; 4025 ir->lod_info.grad.dPdx->accept(this); 4026 dx = this->result; 4027 ir->lod_info.grad.dPdy->accept(this); 4028 dy = this->result; 4029 if (ir->offset) { 4030 ir->offset->accept(this); 4031 offset[0] = this->result; 4032 } 4033 break; 4034 case ir_txs: 4035 opcode = TGSI_OPCODE_TXQ; 4036 ir->lod_info.lod->accept(this); 4037 lod_info = this->result; 4038 break; 4039 case ir_query_levels: 4040 opcode = TGSI_OPCODE_TXQ; 4041 lod_info = undef_src; 4042 levels_src = get_temp(ir->type); 4043 break; 4044 case ir_txf: 4045 opcode = TGSI_OPCODE_TXF; 4046 ir->lod_info.lod->accept(this); 4047 lod_info = this->result; 4048 if (ir->offset) { 4049 ir->offset->accept(this); 4050 offset[0] = this->result; 4051 } 4052 break; 4053 case ir_txf_ms: 4054 opcode = TGSI_OPCODE_TXF; 4055 ir->lod_info.sample_index->accept(this); 4056 sample_index = this->result; 4057 break; 4058 case ir_tg4: 4059 opcode = TGSI_OPCODE_TG4; 4060 ir->lod_info.component->accept(this); 4061 component = this->result; 4062 if (ir->offset) { 4063 ir->offset->accept(this); 4064 if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) { 4065 const glsl_type *elt_type = ir->offset->type->fields.array; 4066 for (i = 0; i < ir->offset->type->length; i++) { 4067 offset[i] = this->result; 4068 offset[i].index += i * type_size(elt_type); 4069 offset[i].type = elt_type->base_type; 4070 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); 4071 offset[i] = canonicalize_gather_offset(offset[i]); 4072 } 4073 } else { 4074 offset[0] = canonicalize_gather_offset(this->result); 4075 } 4076 } 4077 break; 4078 case ir_lod: 4079 opcode = TGSI_OPCODE_LODQ; 4080 break; 4081 case ir_texture_samples: 4082 opcode = TGSI_OPCODE_TXQS; 4083 break; 4084 case ir_samples_identical: 4085 unreachable("Unexpected ir_samples_identical opcode"); 4086 } 4087 4088 if (ir->projector) { 4089 if (opcode == TGSI_OPCODE_TEX) { 4090 /* Slot the projector in as the last component of the coord. */ 4091 coord_dst.writemask = WRITEMASK_W; 4092 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); 4093 coord_dst.writemask = WRITEMASK_XYZW; 4094 opcode = TGSI_OPCODE_TXP; 4095 } else { 4096 st_src_reg coord_w = coord; 4097 coord_w.swizzle = SWIZZLE_WWWW; 4098 4099 /* For the other TEX opcodes there's no projective version 4100 * since the last slot is taken up by LOD info. Do the 4101 * projective divide now. 4102 */ 4103 coord_dst.writemask = WRITEMASK_W; 4104 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); 4105 4106 /* In the case where we have to project the coordinates "by hand," 4107 * the shadow comparator value must also be projected. 4108 */ 4109 st_src_reg tmp_src = coord; 4110 if (ir->shadow_comparator) { 4111 /* Slot the shadow value in as the second to last component of the 4112 * coord. 4113 */ 4114 ir->shadow_comparator->accept(this); 4115 4116 tmp_src = get_temp(glsl_type::vec4_type); 4117 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 4118 4119 /* Projective division not allowed for array samplers. */ 4120 assert(!sampler_type->sampler_array); 4121 4122 tmp_dst.writemask = WRITEMASK_Z; 4123 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 4124 4125 tmp_dst.writemask = WRITEMASK_XY; 4126 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 4127 } 4128 4129 coord_dst.writemask = WRITEMASK_XYZ; 4130 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 4131 4132 coord_dst.writemask = WRITEMASK_XYZW; 4133 coord.swizzle = SWIZZLE_XYZW; 4134 } 4135 } 4136 4137 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 4138 * comparator was put in the correct place (and projected) by the code, 4139 * above, that handles by-hand projection. 4140 */ 4141 if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 4142 /* Slot the shadow value in as the second to last component of the 4143 * coord. 4144 */ 4145 ir->shadow_comparator->accept(this); 4146 4147 if (is_cube_array) { 4148 cube_sc = get_temp(glsl_type::float_type); 4149 cube_sc_dst = st_dst_reg(cube_sc); 4150 cube_sc_dst.writemask = WRITEMASK_X; 4151 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); 4152 cube_sc_dst.writemask = WRITEMASK_X; 4153 } 4154 else { 4155 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 4156 sampler_type->sampler_array) || 4157 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 4158 coord_dst.writemask = WRITEMASK_W; 4159 } else { 4160 coord_dst.writemask = WRITEMASK_Z; 4161 } 4162 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 4163 coord_dst.writemask = WRITEMASK_XYZW; 4164 } 4165 } 4166 4167 if (ir->op == ir_txf_ms) { 4168 coord_dst.writemask = WRITEMASK_W; 4169 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); 4170 coord_dst.writemask = WRITEMASK_XYZW; 4171 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 4172 opcode == TGSI_OPCODE_TXF) { 4173 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 4174 coord_dst.writemask = WRITEMASK_W; 4175 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 4176 coord_dst.writemask = WRITEMASK_XYZW; 4177 } 4178 4179 get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, 4180 &sampler_index, &reladdr, true); 4181 if (reladdr.file != PROGRAM_UNDEFINED) 4182 emit_arl(ir, sampler_reladdr, reladdr); 4183 4184 if (opcode == TGSI_OPCODE_TXD) 4185 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); 4186 else if (opcode == TGSI_OPCODE_TXQ) { 4187 if (ir->op == ir_query_levels) { 4188 /* the level is stored in W */ 4189 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); 4190 result_dst.writemask = WRITEMASK_X; 4191 levels_src.swizzle = SWIZZLE_WWWW; 4192 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); 4193 } else 4194 inst = emit_asm(ir, opcode, result_dst, lod_info); 4195 } else if (opcode == TGSI_OPCODE_TXQS) { 4196 inst = emit_asm(ir, opcode, result_dst); 4197 } else if (opcode == TGSI_OPCODE_TXF) { 4198 inst = emit_asm(ir, opcode, result_dst, coord); 4199 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { 4200 inst = emit_asm(ir, opcode, result_dst, coord, lod_info); 4201 } else if (opcode == TGSI_OPCODE_TEX2) { 4202 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4203 } else if (opcode == TGSI_OPCODE_TG4) { 4204 if (is_cube_array && ir->shadow_comparator) { 4205 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4206 } else { 4207 inst = emit_asm(ir, opcode, result_dst, coord, component); 4208 } 4209 } else 4210 inst = emit_asm(ir, opcode, result_dst, coord); 4211 4212 if (ir->shadow_comparator) 4213 inst->tex_shadow = GL_TRUE; 4214 4215 inst->resource.index = sampler_index; 4216 inst->sampler_array_size = sampler_array_size; 4217 inst->sampler_base = sampler_base; 4218 4219 if (reladdr.file != PROGRAM_UNDEFINED) { 4220 inst->resource.reladdr = ralloc(mem_ctx, st_src_reg); 4221 memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr)); 4222 } 4223 4224 if (ir->offset) { 4225 if (!inst->tex_offsets) 4226 inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET); 4227 4228 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++) 4229 inst->tex_offsets[i] = offset[i]; 4230 inst->tex_offset_num_offset = i; 4231 } 4232 4233 switch (sampler_type->sampler_dimensionality) { 4234 case GLSL_SAMPLER_DIM_1D: 4235 inst->tex_target = (sampler_type->sampler_array) 4236 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 4237 break; 4238 case GLSL_SAMPLER_DIM_2D: 4239 inst->tex_target = (sampler_type->sampler_array) 4240 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 4241 break; 4242 case GLSL_SAMPLER_DIM_3D: 4243 inst->tex_target = TEXTURE_3D_INDEX; 4244 break; 4245 case GLSL_SAMPLER_DIM_CUBE: 4246 inst->tex_target = (sampler_type->sampler_array) 4247 ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; 4248 break; 4249 case GLSL_SAMPLER_DIM_RECT: 4250 inst->tex_target = TEXTURE_RECT_INDEX; 4251 break; 4252 case GLSL_SAMPLER_DIM_BUF: 4253 inst->tex_target = TEXTURE_BUFFER_INDEX; 4254 break; 4255 case GLSL_SAMPLER_DIM_EXTERNAL: 4256 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 4257 break; 4258 case GLSL_SAMPLER_DIM_MS: 4259 inst->tex_target = (sampler_type->sampler_array) 4260 ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; 4261 break; 4262 default: 4263 assert(!"Should not get here."); 4264 } 4265 4266 inst->tex_type = ir->type->base_type; 4267 4268 this->result = result_src; 4269} 4270 4271void 4272glsl_to_tgsi_visitor::visit(ir_return *ir) 4273{ 4274 assert(!ir->get_value()); 4275 4276 emit_asm(ir, TGSI_OPCODE_RET); 4277} 4278 4279void 4280glsl_to_tgsi_visitor::visit(ir_discard *ir) 4281{ 4282 if (ir->condition) { 4283 ir->condition->accept(this); 4284 st_src_reg condition = this->result; 4285 4286 /* Convert the bool condition to a float so we can negate. */ 4287 if (native_integers) { 4288 st_src_reg temp = get_temp(ir->condition->type); 4289 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), 4290 condition, st_src_reg_for_float(1.0)); 4291 condition = temp; 4292 } 4293 4294 condition.negate = ~condition.negate; 4295 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); 4296 } else { 4297 /* unconditional kil */ 4298 emit_asm(ir, TGSI_OPCODE_KILL); 4299 } 4300} 4301 4302void 4303glsl_to_tgsi_visitor::visit(ir_if *ir) 4304{ 4305 unsigned if_opcode; 4306 glsl_to_tgsi_instruction *if_inst; 4307 4308 ir->condition->accept(this); 4309 assert(this->result.file != PROGRAM_UNDEFINED); 4310 4311 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; 4312 4313 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); 4314 4315 this->instructions.push_tail(if_inst); 4316 4317 visit_exec_list(&ir->then_instructions, this); 4318 4319 if (!ir->else_instructions.is_empty()) { 4320 emit_asm(ir->condition, TGSI_OPCODE_ELSE); 4321 visit_exec_list(&ir->else_instructions, this); 4322 } 4323 4324 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); 4325} 4326 4327 4328void 4329glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) 4330{ 4331 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4332 4333 ir->stream->accept(this); 4334 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); 4335} 4336 4337void 4338glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) 4339{ 4340 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4341 4342 ir->stream->accept(this); 4343 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); 4344} 4345 4346void 4347glsl_to_tgsi_visitor::visit(ir_barrier *ir) 4348{ 4349 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV || 4350 this->prog->Target == GL_COMPUTE_PROGRAM_NV); 4351 4352 emit_asm(ir, TGSI_OPCODE_BARRIER); 4353} 4354 4355glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 4356{ 4357 STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); 4358 4359 result.file = PROGRAM_UNDEFINED; 4360 next_temp = 1; 4361 array_sizes = NULL; 4362 max_num_arrays = 0; 4363 next_array = 0; 4364 num_inputs = 0; 4365 num_outputs = 0; 4366 num_input_arrays = 0; 4367 num_output_arrays = 0; 4368 num_immediates = 0; 4369 num_address_regs = 0; 4370 samplers_used = 0; 4371 buffers_used = 0; 4372 images_used = 0; 4373 indirect_addr_consts = false; 4374 wpos_transform_const = -1; 4375 glsl_version = 0; 4376 native_integers = false; 4377 mem_ctx = ralloc_context(NULL); 4378 ctx = NULL; 4379 prog = NULL; 4380 shader_program = NULL; 4381 shader = NULL; 4382 options = NULL; 4383 have_sqrt = false; 4384 have_fma = false; 4385 use_shared_memory = false; 4386} 4387 4388glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 4389{ 4390 free(array_sizes); 4391 ralloc_free(mem_ctx); 4392} 4393 4394extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 4395{ 4396 delete v; 4397} 4398 4399 4400/** 4401 * Count resources used by the given gpu program (number of texture 4402 * samplers, etc). 4403 */ 4404static void 4405count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 4406{ 4407 v->samplers_used = 0; 4408 v->buffers_used = 0; 4409 v->images_used = 0; 4410 4411 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { 4412 if (inst->info->is_tex) { 4413 for (int i = 0; i < inst->sampler_array_size; i++) { 4414 unsigned idx = inst->sampler_base + i; 4415 v->samplers_used |= 1u << idx; 4416 4417 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); 4418 v->sampler_types[idx] = inst->tex_type; 4419 v->sampler_targets[idx] = 4420 st_translate_texture_target(inst->tex_target, inst->tex_shadow); 4421 4422 if (inst->tex_shadow) { 4423 prog->ShadowSamplers |= 1 << (inst->resource.index + i); 4424 } 4425 } 4426 } 4427 4428 if (inst->tex_target == TEXTURE_EXTERNAL_INDEX) 4429 prog->ExternalSamplersUsed |= 1 << inst->resource.index; 4430 4431 if (inst->resource.file != PROGRAM_UNDEFINED && ( 4432 is_resource_instruction(inst->op) || 4433 inst->op == TGSI_OPCODE_STORE)) { 4434 if (inst->resource.file == PROGRAM_BUFFER) { 4435 v->buffers_used |= 1 << inst->resource.index; 4436 } else if (inst->resource.file == PROGRAM_MEMORY) { 4437 v->use_shared_memory = true; 4438 } else { 4439 assert(inst->resource.file == PROGRAM_IMAGE); 4440 for (int i = 0; i < inst->sampler_array_size; i++) { 4441 unsigned idx = inst->sampler_base + i; 4442 v->images_used |= 1 << idx; 4443 v->image_targets[idx] = 4444 st_translate_texture_target(inst->tex_target, false); 4445 v->image_formats[idx] = inst->image_format; 4446 } 4447 } 4448 } 4449 } 4450 prog->SamplersUsed = v->samplers_used; 4451 4452 if (v->shader_program != NULL) 4453 _mesa_update_shader_textures_used(v->shader_program, prog); 4454} 4455 4456/** 4457 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 4458 * are read from the given src in this instruction 4459 */ 4460static int 4461get_src_arg_mask(st_dst_reg dst, st_src_reg src) 4462{ 4463 int read_mask = 0, comp; 4464 4465 /* Now, given the src swizzle and the written channels, find which 4466 * components are actually read 4467 */ 4468 for (comp = 0; comp < 4; ++comp) { 4469 const unsigned coord = GET_SWZ(src.swizzle, comp); 4470 assert(coord < 4); 4471 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 4472 read_mask |= 1 << coord; 4473 } 4474 4475 return read_mask; 4476} 4477 4478/** 4479 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 4480 * instruction is the first instruction to write to register T0. There are 4481 * several lowering passes done in GLSL IR (e.g. branches and 4482 * relative addressing) that create a large number of conditional assignments 4483 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 4484 * 4485 * Here is why this conversion is safe: 4486 * CMP T0, T1 T2 T0 can be expanded to: 4487 * if (T1 < 0.0) 4488 * MOV T0, T2; 4489 * else 4490 * MOV T0, T0; 4491 * 4492 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 4493 * as the original program. If (T1 < 0.0) evaluates to false, executing 4494 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 4495 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 4496 * because any instruction that was going to read from T0 after this was going 4497 * to read a garbage value anyway. 4498 */ 4499void 4500glsl_to_tgsi_visitor::simplify_cmp(void) 4501{ 4502 int tempWritesSize = 0; 4503 unsigned *tempWrites = NULL; 4504 unsigned outputWrites[VARYING_SLOT_TESS_MAX]; 4505 4506 memset(outputWrites, 0, sizeof(outputWrites)); 4507 4508 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4509 unsigned prevWriteMask = 0; 4510 4511 /* Give up if we encounter relative addressing or flow control. */ 4512 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || 4513 inst->dst[1].reladdr || inst->dst[1].reladdr2 || 4514 tgsi_get_opcode_info(inst->op)->is_branch || 4515 inst->op == TGSI_OPCODE_CONT || 4516 inst->op == TGSI_OPCODE_END || 4517 inst->op == TGSI_OPCODE_RET) { 4518 break; 4519 } 4520 4521 if (inst->dst[0].file == PROGRAM_OUTPUT) { 4522 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); 4523 prevWriteMask = outputWrites[inst->dst[0].index]; 4524 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; 4525 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { 4526 if (inst->dst[0].index >= tempWritesSize) { 4527 const int inc = 4096; 4528 4529 tempWrites = (unsigned*) 4530 realloc(tempWrites, 4531 (tempWritesSize + inc) * sizeof(unsigned)); 4532 if (!tempWrites) 4533 return; 4534 4535 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); 4536 tempWritesSize += inc; 4537 } 4538 4539 prevWriteMask = tempWrites[inst->dst[0].index]; 4540 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask; 4541 } else 4542 continue; 4543 4544 /* For a CMP to be considered a conditional write, the destination 4545 * register and source register two must be the same. */ 4546 if (inst->op == TGSI_OPCODE_CMP 4547 && !(inst->dst[0].writemask & prevWriteMask) 4548 && inst->src[2].file == inst->dst[0].file 4549 && inst->src[2].index == inst->dst[0].index 4550 && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) { 4551 4552 inst->op = TGSI_OPCODE_MOV; 4553 inst->info = tgsi_get_opcode_info(inst->op); 4554 inst->src[0] = inst->src[1]; 4555 } 4556 } 4557 4558 free(tempWrites); 4559} 4560 4561/* Replaces all references to a temporary register index with another index. */ 4562void 4563glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames) 4564{ 4565 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4566 unsigned j; 4567 int k; 4568 for (j = 0; j < num_inst_src_regs(inst); j++) { 4569 if (inst->src[j].file == PROGRAM_TEMPORARY) 4570 for (k = 0; k < num_renames; k++) 4571 if (inst->src[j].index == renames[k].old_reg) 4572 inst->src[j].index = renames[k].new_reg; 4573 } 4574 4575 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4576 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) 4577 for (k = 0; k < num_renames; k++) 4578 if (inst->tex_offsets[j].index == renames[k].old_reg) 4579 inst->tex_offsets[j].index = renames[k].new_reg; 4580 } 4581 4582 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4583 if (inst->dst[j].file == PROGRAM_TEMPORARY) 4584 for (k = 0; k < num_renames; k++) 4585 if (inst->dst[j].index == renames[k].old_reg) 4586 inst->dst[j].index = renames[k].new_reg; 4587 } 4588 } 4589} 4590 4591void 4592glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) 4593{ 4594 int depth = 0; /* loop depth */ 4595 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 4596 unsigned i = 0, j; 4597 4598 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4599 for (j = 0; j < num_inst_src_regs(inst); j++) { 4600 if (inst->src[j].file == PROGRAM_TEMPORARY) { 4601 if (first_reads[inst->src[j].index] == -1) 4602 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; 4603 } 4604 } 4605 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4606 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { 4607 if (first_reads[inst->tex_offsets[j].index] == -1) 4608 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; 4609 } 4610 } 4611 if (inst->op == TGSI_OPCODE_BGNLOOP) { 4612 if(depth++ == 0) 4613 loop_start = i; 4614 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 4615 if (--depth == 0) 4616 loop_start = -1; 4617 } 4618 assert(depth >= 0); 4619 i++; 4620 } 4621} 4622 4623void 4624glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) 4625{ 4626 int depth = 0; /* loop depth */ 4627 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 4628 unsigned i = 0, j; 4629 int k; 4630 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4631 for (j = 0; j < num_inst_src_regs(inst); j++) { 4632 if (inst->src[j].file == PROGRAM_TEMPORARY) 4633 last_reads[inst->src[j].index] = (depth == 0) ? i : -2; 4634 } 4635 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4636 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 4637 if (first_writes[inst->dst[j].index] == -1) 4638 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; 4639 last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; 4640 } 4641 } 4642 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4643 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) 4644 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; 4645 } 4646 if (inst->op == TGSI_OPCODE_BGNLOOP) { 4647 if(depth++ == 0) 4648 loop_start = i; 4649 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 4650 if (--depth == 0) { 4651 loop_start = -1; 4652 for (k = 0; k < this->next_temp; k++) { 4653 if (last_reads[k] == -2) { 4654 last_reads[k] = i; 4655 } 4656 } 4657 } 4658 } 4659 assert(depth >= 0); 4660 i++; 4661 } 4662} 4663 4664void 4665glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) 4666{ 4667 int depth = 0; /* loop depth */ 4668 int i = 0, k; 4669 unsigned j; 4670 4671 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4672 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4673 if (inst->dst[j].file == PROGRAM_TEMPORARY) 4674 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; 4675 } 4676 4677 if (inst->op == TGSI_OPCODE_BGNLOOP) 4678 depth++; 4679 else if (inst->op == TGSI_OPCODE_ENDLOOP) 4680 if (--depth == 0) { 4681 for (k = 0; k < this->next_temp; k++) { 4682 if (last_writes[k] == -2) { 4683 last_writes[k] = i; 4684 } 4685 } 4686 } 4687 assert(depth >= 0); 4688 i++; 4689 } 4690} 4691 4692/* 4693 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 4694 * channels for copy propagation and updates following instructions to 4695 * use the original versions. 4696 * 4697 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 4698 * will occur. As an example, a TXP production before this pass: 4699 * 4700 * 0: MOV TEMP[1], INPUT[4].xyyy; 4701 * 1: MOV TEMP[1].w, INPUT[4].wwww; 4702 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 4703 * 4704 * and after: 4705 * 4706 * 0: MOV TEMP[1], INPUT[4].xyyy; 4707 * 1: MOV TEMP[1].w, INPUT[4].wwww; 4708 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 4709 * 4710 * which allows for dead code elimination on TEMP[1]'s writes. 4711 */ 4712void 4713glsl_to_tgsi_visitor::copy_propagate(void) 4714{ 4715 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 4716 glsl_to_tgsi_instruction *, 4717 this->next_temp * 4); 4718 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 4719 int level = 0; 4720 4721 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4722 assert(inst->dst[0].file != PROGRAM_TEMPORARY 4723 || inst->dst[0].index < this->next_temp); 4724 4725 /* First, do any copy propagation possible into the src regs. */ 4726 for (int r = 0; r < 3; r++) { 4727 glsl_to_tgsi_instruction *first = NULL; 4728 bool good = true; 4729 int acp_base = inst->src[r].index * 4; 4730 4731 if (inst->src[r].file != PROGRAM_TEMPORARY || 4732 inst->src[r].reladdr || 4733 inst->src[r].reladdr2) 4734 continue; 4735 4736 /* See if we can find entries in the ACP consisting of MOVs 4737 * from the same src register for all the swizzled channels 4738 * of this src register reference. 4739 */ 4740 for (int i = 0; i < 4; i++) { 4741 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 4742 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 4743 4744 if (!copy_chan) { 4745 good = false; 4746 break; 4747 } 4748 4749 assert(acp_level[acp_base + src_chan] <= level); 4750 4751 if (!first) { 4752 first = copy_chan; 4753 } else { 4754 if (first->src[0].file != copy_chan->src[0].file || 4755 first->src[0].index != copy_chan->src[0].index || 4756 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 || 4757 first->src[0].index2D != copy_chan->src[0].index2D) { 4758 good = false; 4759 break; 4760 } 4761 } 4762 } 4763 4764 if (good) { 4765 /* We've now validated that we can copy-propagate to 4766 * replace this src register reference. Do it. 4767 */ 4768 inst->src[r].file = first->src[0].file; 4769 inst->src[r].index = first->src[0].index; 4770 inst->src[r].index2D = first->src[0].index2D; 4771 inst->src[r].has_index2 = first->src[0].has_index2; 4772 inst->src[r].double_reg2 = first->src[0].double_reg2; 4773 inst->src[r].array_id = first->src[0].array_id; 4774 4775 int swizzle = 0; 4776 for (int i = 0; i < 4; i++) { 4777 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 4778 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 4779 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i)); 4780 } 4781 inst->src[r].swizzle = swizzle; 4782 } 4783 } 4784 4785 switch (inst->op) { 4786 case TGSI_OPCODE_BGNLOOP: 4787 case TGSI_OPCODE_ENDLOOP: 4788 /* End of a basic block, clear the ACP entirely. */ 4789 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 4790 break; 4791 4792 case TGSI_OPCODE_IF: 4793 case TGSI_OPCODE_UIF: 4794 ++level; 4795 break; 4796 4797 case TGSI_OPCODE_ENDIF: 4798 case TGSI_OPCODE_ELSE: 4799 /* Clear all channels written inside the block from the ACP, but 4800 * leaving those that were not touched. 4801 */ 4802 for (int r = 0; r < this->next_temp; r++) { 4803 for (int c = 0; c < 4; c++) { 4804 if (!acp[4 * r + c]) 4805 continue; 4806 4807 if (acp_level[4 * r + c] >= level) 4808 acp[4 * r + c] = NULL; 4809 } 4810 } 4811 if (inst->op == TGSI_OPCODE_ENDIF) 4812 --level; 4813 break; 4814 4815 default: 4816 /* Continuing the block, clear any written channels from 4817 * the ACP. 4818 */ 4819 for (int d = 0; d < 2; d++) { 4820 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) { 4821 /* Any temporary might be written, so no copy propagation 4822 * across this instruction. 4823 */ 4824 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 4825 } else if (inst->dst[d].file == PROGRAM_OUTPUT && 4826 inst->dst[d].reladdr) { 4827 /* Any output might be written, so no copy propagation 4828 * from outputs across this instruction. 4829 */ 4830 for (int r = 0; r < this->next_temp; r++) { 4831 for (int c = 0; c < 4; c++) { 4832 if (!acp[4 * r + c]) 4833 continue; 4834 4835 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 4836 acp[4 * r + c] = NULL; 4837 } 4838 } 4839 } else if (inst->dst[d].file == PROGRAM_TEMPORARY || 4840 inst->dst[d].file == PROGRAM_OUTPUT) { 4841 /* Clear where it's used as dst. */ 4842 if (inst->dst[d].file == PROGRAM_TEMPORARY) { 4843 for (int c = 0; c < 4; c++) { 4844 if (inst->dst[d].writemask & (1 << c)) 4845 acp[4 * inst->dst[d].index + c] = NULL; 4846 } 4847 } 4848 4849 /* Clear where it's used as src. */ 4850 for (int r = 0; r < this->next_temp; r++) { 4851 for (int c = 0; c < 4; c++) { 4852 if (!acp[4 * r + c]) 4853 continue; 4854 4855 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 4856 4857 if (acp[4 * r + c]->src[0].file == inst->dst[d].file && 4858 acp[4 * r + c]->src[0].index == inst->dst[d].index && 4859 inst->dst[d].writemask & (1 << src_chan)) { 4860 acp[4 * r + c] = NULL; 4861 } 4862 } 4863 } 4864 } 4865 } 4866 break; 4867 } 4868 4869 /* If this is a copy, add it to the ACP. */ 4870 if (inst->op == TGSI_OPCODE_MOV && 4871 inst->dst[0].file == PROGRAM_TEMPORARY && 4872 !(inst->dst[0].file == inst->src[0].file && 4873 inst->dst[0].index == inst->src[0].index) && 4874 !inst->dst[0].reladdr && 4875 !inst->dst[0].reladdr2 && 4876 !inst->saturate && 4877 inst->src[0].file != PROGRAM_ARRAY && 4878 !inst->src[0].reladdr && 4879 !inst->src[0].reladdr2 && 4880 !inst->src[0].negate && 4881 !inst->src[0].abs) { 4882 for (int i = 0; i < 4; i++) { 4883 if (inst->dst[0].writemask & (1 << i)) { 4884 acp[4 * inst->dst[0].index + i] = inst; 4885 acp_level[4 * inst->dst[0].index + i] = level; 4886 } 4887 } 4888 } 4889 } 4890 4891 ralloc_free(acp_level); 4892 ralloc_free(acp); 4893} 4894 4895/* 4896 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 4897 * code elimination. 4898 * 4899 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 4900 * will occur. As an example, a TXP production after copy propagation but 4901 * before this pass: 4902 * 4903 * 0: MOV TEMP[1], INPUT[4].xyyy; 4904 * 1: MOV TEMP[1].w, INPUT[4].wwww; 4905 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 4906 * 4907 * and after this pass: 4908 * 4909 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 4910 */ 4911int 4912glsl_to_tgsi_visitor::eliminate_dead_code(void) 4913{ 4914 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 4915 glsl_to_tgsi_instruction *, 4916 this->next_temp * 4); 4917 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 4918 int level = 0; 4919 int removed = 0; 4920 4921 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4922 assert(inst->dst[0].file != PROGRAM_TEMPORARY 4923 || inst->dst[0].index < this->next_temp); 4924 4925 switch (inst->op) { 4926 case TGSI_OPCODE_BGNLOOP: 4927 case TGSI_OPCODE_ENDLOOP: 4928 case TGSI_OPCODE_CONT: 4929 case TGSI_OPCODE_BRK: 4930 /* End of a basic block, clear the write array entirely. 4931 * 4932 * This keeps us from killing dead code when the writes are 4933 * on either side of a loop, even when the register isn't touched 4934 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 4935 * dead code of this type, so it shouldn't make a difference as long as 4936 * the dead code elimination pass in the GLSL compiler does its job. 4937 */ 4938 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 4939 break; 4940 4941 case TGSI_OPCODE_ENDIF: 4942 case TGSI_OPCODE_ELSE: 4943 /* Promote the recorded level of all channels written inside the 4944 * preceding if or else block to the level above the if/else block. 4945 */ 4946 for (int r = 0; r < this->next_temp; r++) { 4947 for (int c = 0; c < 4; c++) { 4948 if (!writes[4 * r + c]) 4949 continue; 4950 4951 if (write_level[4 * r + c] == level) 4952 write_level[4 * r + c] = level-1; 4953 } 4954 } 4955 if(inst->op == TGSI_OPCODE_ENDIF) 4956 --level; 4957 break; 4958 4959 case TGSI_OPCODE_IF: 4960 case TGSI_OPCODE_UIF: 4961 ++level; 4962 /* fallthrough to default case to mark the condition as read */ 4963 default: 4964 /* Continuing the block, clear any channels from the write array that 4965 * are read by this instruction. 4966 */ 4967 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { 4968 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 4969 /* Any temporary might be read, so no dead code elimination 4970 * across this instruction. 4971 */ 4972 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 4973 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 4974 /* Clear where it's used as src. */ 4975 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 4976 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 4977 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 4978 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 4979 4980 for (int c = 0; c < 4; c++) { 4981 if (src_chans & (1 << c)) 4982 writes[4 * inst->src[i].index + c] = NULL; 4983 } 4984 } 4985 } 4986 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) { 4987 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){ 4988 /* Any temporary might be read, so no dead code elimination 4989 * across this instruction. 4990 */ 4991 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 4992 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) { 4993 /* Clear where it's used as src. */ 4994 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0); 4995 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1); 4996 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2); 4997 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3); 4998 4999 for (int c = 0; c < 4; c++) { 5000 if (src_chans & (1 << c)) 5001 writes[4 * inst->tex_offsets[i].index + c] = NULL; 5002 } 5003 } 5004 } 5005 break; 5006 } 5007 5008 /* If this instruction writes to a temporary, add it to the write array. 5009 * If there is already an instruction in the write array for one or more 5010 * of the channels, flag that channel write as dead. 5011 */ 5012 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { 5013 if (inst->dst[i].file == PROGRAM_TEMPORARY && 5014 !inst->dst[i].reladdr) { 5015 for (int c = 0; c < 4; c++) { 5016 if (inst->dst[i].writemask & (1 << c)) { 5017 if (writes[4 * inst->dst[i].index + c]) { 5018 if (write_level[4 * inst->dst[i].index + c] < level) 5019 continue; 5020 else 5021 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c); 5022 } 5023 writes[4 * inst->dst[i].index + c] = inst; 5024 write_level[4 * inst->dst[i].index + c] = level; 5025 } 5026 } 5027 } 5028 } 5029 } 5030 5031 /* Anything still in the write array at this point is dead code. */ 5032 for (int r = 0; r < this->next_temp; r++) { 5033 for (int c = 0; c < 4; c++) { 5034 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 5035 if (inst) 5036 inst->dead_mask |= (1 << c); 5037 } 5038 } 5039 5040 /* Now actually remove the instructions that are completely dead and update 5041 * the writemask of other instructions with dead channels. 5042 */ 5043 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { 5044 if (!inst->dead_mask || !inst->dst[0].writemask) 5045 continue; 5046 /* No amount of dead masks should remove memory stores */ 5047 if (inst->info->is_store) 5048 continue; 5049 5050 if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { 5051 inst->remove(); 5052 delete inst; 5053 removed++; 5054 } else { 5055 if (glsl_base_type_is_64bit(inst->dst[0].type)) { 5056 if (inst->dead_mask == WRITEMASK_XY || 5057 inst->dead_mask == WRITEMASK_ZW) 5058 inst->dst[0].writemask &= ~(inst->dead_mask); 5059 } else 5060 inst->dst[0].writemask &= ~(inst->dead_mask); 5061 } 5062 } 5063 5064 ralloc_free(write_level); 5065 ralloc_free(writes); 5066 5067 return removed; 5068} 5069 5070/* merge DFRACEXP instructions into one. */ 5071void 5072glsl_to_tgsi_visitor::merge_two_dsts(void) 5073{ 5074 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { 5075 glsl_to_tgsi_instruction *inst2; 5076 bool merged; 5077 if (num_inst_dst_regs(inst) != 2) 5078 continue; 5079 5080 if (inst->dst[0].file != PROGRAM_UNDEFINED && 5081 inst->dst[1].file != PROGRAM_UNDEFINED) 5082 continue; 5083 5084 inst2 = (glsl_to_tgsi_instruction *) inst->next; 5085 do { 5086 5087 if (inst->src[0].file == inst2->src[0].file && 5088 inst->src[0].index == inst2->src[0].index && 5089 inst->src[0].type == inst2->src[0].type && 5090 inst->src[0].swizzle == inst2->src[0].swizzle) 5091 break; 5092 inst2 = (glsl_to_tgsi_instruction *) inst2->next; 5093 } while (inst2); 5094 5095 if (!inst2) 5096 continue; 5097 merged = false; 5098 if (inst->dst[0].file == PROGRAM_UNDEFINED) { 5099 merged = true; 5100 inst->dst[0] = inst2->dst[0]; 5101 } else if (inst->dst[1].file == PROGRAM_UNDEFINED) { 5102 inst->dst[1] = inst2->dst[1]; 5103 merged = true; 5104 } 5105 5106 if (merged) { 5107 inst2->remove(); 5108 delete inst2; 5109 } 5110 } 5111} 5112 5113/* Merges temporary registers together where possible to reduce the number of 5114 * registers needed to run a program. 5115 * 5116 * Produces optimal code only after copy propagation and dead code elimination 5117 * have been run. */ 5118void 5119glsl_to_tgsi_visitor::merge_registers(void) 5120{ 5121 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 5122 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 5123 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); 5124 int i, j; 5125 int num_renames = 0; 5126 5127 /* Read the indices of the last read and first write to each temp register 5128 * into an array so that we don't have to traverse the instruction list as 5129 * much. */ 5130 for (i = 0; i < this->next_temp; i++) { 5131 last_reads[i] = -1; 5132 first_writes[i] = -1; 5133 } 5134 get_last_temp_read_first_temp_write(last_reads, first_writes); 5135 5136 /* Start looking for registers with non-overlapping usages that can be 5137 * merged together. */ 5138 for (i = 0; i < this->next_temp; i++) { 5139 /* Don't touch unused registers. */ 5140 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 5141 5142 for (j = 0; j < this->next_temp; j++) { 5143 /* Don't touch unused registers. */ 5144 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 5145 5146 /* We can merge the two registers if the first write to j is after or 5147 * in the same instruction as the last read from i. Note that the 5148 * register at index i will always be used earlier or at the same time 5149 * as the register at index j. */ 5150 if (first_writes[i] <= first_writes[j] && 5151 last_reads[i] <= first_writes[j]) { 5152 renames[num_renames].old_reg = j; 5153 renames[num_renames].new_reg = i; 5154 num_renames++; 5155 5156 /* Update the first_writes and last_reads arrays with the new 5157 * values for the merged register index, and mark the newly unused 5158 * register index as such. */ 5159 assert(last_reads[j] >= last_reads[i]); 5160 last_reads[i] = last_reads[j]; 5161 first_writes[j] = -1; 5162 last_reads[j] = -1; 5163 } 5164 } 5165 } 5166 5167 rename_temp_registers(num_renames, renames); 5168 ralloc_free(renames); 5169 ralloc_free(last_reads); 5170 ralloc_free(first_writes); 5171} 5172 5173/* Reassign indices to temporary registers by reusing unused indices created 5174 * by optimization passes. */ 5175void 5176glsl_to_tgsi_visitor::renumber_registers(void) 5177{ 5178 int i = 0; 5179 int new_index = 0; 5180 int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp); 5181 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); 5182 int num_renames = 0; 5183 for (i = 0; i < this->next_temp; i++) { 5184 first_reads[i] = -1; 5185 } 5186 get_first_temp_read(first_reads); 5187 5188 for (i = 0; i < this->next_temp; i++) { 5189 if (first_reads[i] < 0) continue; 5190 if (i != new_index) { 5191 renames[num_renames].old_reg = i; 5192 renames[num_renames].new_reg = new_index; 5193 num_renames++; 5194 } 5195 new_index++; 5196 } 5197 5198 rename_temp_registers(num_renames, renames); 5199 this->next_temp = new_index; 5200 ralloc_free(renames); 5201 ralloc_free(first_reads); 5202} 5203 5204/* ------------------------- TGSI conversion stuff -------------------------- */ 5205 5206/** 5207 * Intermediate state used during shader translation. 5208 */ 5209struct st_translate { 5210 struct ureg_program *ureg; 5211 5212 unsigned temps_size; 5213 struct ureg_dst *temps; 5214 5215 struct ureg_dst *arrays; 5216 unsigned num_temp_arrays; 5217 struct ureg_src *constants; 5218 int num_constants; 5219 struct ureg_src *immediates; 5220 int num_immediates; 5221 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 5222 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 5223 struct ureg_dst address[3]; 5224 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 5225 struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; 5226 struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; 5227 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 5228 struct ureg_src shared_memory; 5229 unsigned *array_sizes; 5230 struct inout_decl *input_decls; 5231 unsigned num_input_decls; 5232 struct inout_decl *output_decls; 5233 unsigned num_output_decls; 5234 5235 const GLuint *inputMapping; 5236 const GLuint *outputMapping; 5237 5238 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ 5239}; 5240 5241/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 5242unsigned 5243_mesa_sysval_to_semantic(unsigned sysval) 5244{ 5245 switch (sysval) { 5246 /* Vertex shader */ 5247 case SYSTEM_VALUE_VERTEX_ID: 5248 return TGSI_SEMANTIC_VERTEXID; 5249 case SYSTEM_VALUE_INSTANCE_ID: 5250 return TGSI_SEMANTIC_INSTANCEID; 5251 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 5252 return TGSI_SEMANTIC_VERTEXID_NOBASE; 5253 case SYSTEM_VALUE_BASE_VERTEX: 5254 return TGSI_SEMANTIC_BASEVERTEX; 5255 case SYSTEM_VALUE_BASE_INSTANCE: 5256 return TGSI_SEMANTIC_BASEINSTANCE; 5257 case SYSTEM_VALUE_DRAW_ID: 5258 return TGSI_SEMANTIC_DRAWID; 5259 5260 /* Geometry shader */ 5261 case SYSTEM_VALUE_INVOCATION_ID: 5262 return TGSI_SEMANTIC_INVOCATIONID; 5263 5264 /* Fragment shader */ 5265 case SYSTEM_VALUE_FRAG_COORD: 5266 return TGSI_SEMANTIC_POSITION; 5267 case SYSTEM_VALUE_FRONT_FACE: 5268 return TGSI_SEMANTIC_FACE; 5269 case SYSTEM_VALUE_SAMPLE_ID: 5270 return TGSI_SEMANTIC_SAMPLEID; 5271 case SYSTEM_VALUE_SAMPLE_POS: 5272 return TGSI_SEMANTIC_SAMPLEPOS; 5273 case SYSTEM_VALUE_SAMPLE_MASK_IN: 5274 return TGSI_SEMANTIC_SAMPLEMASK; 5275 case SYSTEM_VALUE_HELPER_INVOCATION: 5276 return TGSI_SEMANTIC_HELPER_INVOCATION; 5277 5278 /* Tessellation shader */ 5279 case SYSTEM_VALUE_TESS_COORD: 5280 return TGSI_SEMANTIC_TESSCOORD; 5281 case SYSTEM_VALUE_VERTICES_IN: 5282 return TGSI_SEMANTIC_VERTICESIN; 5283 case SYSTEM_VALUE_PRIMITIVE_ID: 5284 return TGSI_SEMANTIC_PRIMID; 5285 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 5286 return TGSI_SEMANTIC_TESSOUTER; 5287 case SYSTEM_VALUE_TESS_LEVEL_INNER: 5288 return TGSI_SEMANTIC_TESSINNER; 5289 5290 /* Compute shader */ 5291 case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 5292 return TGSI_SEMANTIC_THREAD_ID; 5293 case SYSTEM_VALUE_WORK_GROUP_ID: 5294 return TGSI_SEMANTIC_BLOCK_ID; 5295 case SYSTEM_VALUE_NUM_WORK_GROUPS: 5296 return TGSI_SEMANTIC_GRID_SIZE; 5297 case SYSTEM_VALUE_LOCAL_GROUP_SIZE: 5298 return TGSI_SEMANTIC_BLOCK_SIZE; 5299 5300 /* Unhandled */ 5301 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 5302 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 5303 case SYSTEM_VALUE_VERTEX_CNT: 5304 default: 5305 assert(!"Unexpected SYSTEM_VALUE_ enum"); 5306 return TGSI_SEMANTIC_COUNT; 5307 } 5308} 5309 5310/** 5311 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 5312 */ 5313static struct ureg_src 5314emit_immediate(struct st_translate *t, 5315 gl_constant_value values[4], 5316 int type, int size) 5317{ 5318 struct ureg_program *ureg = t->ureg; 5319 5320 switch(type) 5321 { 5322 case GL_FLOAT: 5323 return ureg_DECL_immediate(ureg, &values[0].f, size); 5324 case GL_DOUBLE: 5325 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); 5326 case GL_INT: 5327 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 5328 case GL_UNSIGNED_INT: 5329 case GL_BOOL: 5330 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 5331 default: 5332 assert(!"should not get here - type must be float, int, uint, or bool"); 5333 return ureg_src_undef(); 5334 } 5335} 5336 5337/** 5338 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 5339 */ 5340static struct ureg_dst 5341dst_register(struct st_translate *t, gl_register_file file, unsigned index, 5342 unsigned array_id) 5343{ 5344 unsigned array; 5345 5346 switch(file) { 5347 case PROGRAM_UNDEFINED: 5348 return ureg_dst_undef(); 5349 5350 case PROGRAM_TEMPORARY: 5351 /* Allocate space for temporaries on demand. */ 5352 if (index >= t->temps_size) { 5353 const int inc = align(index - t->temps_size + 1, 4096); 5354 5355 t->temps = (struct ureg_dst*) 5356 realloc(t->temps, 5357 (t->temps_size + inc) * sizeof(struct ureg_dst)); 5358 if (!t->temps) 5359 return ureg_dst_undef(); 5360 5361 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); 5362 t->temps_size += inc; 5363 } 5364 5365 if (ureg_dst_is_undef(t->temps[index])) 5366 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 5367 5368 return t->temps[index]; 5369 5370 case PROGRAM_ARRAY: 5371 assert(array_id && array_id <= t->num_temp_arrays); 5372 array = array_id - 1; 5373 5374 if (ureg_dst_is_undef(t->arrays[array])) 5375 t->arrays[array] = ureg_DECL_array_temporary( 5376 t->ureg, t->array_sizes[array], TRUE); 5377 5378 return ureg_dst_array_offset(t->arrays[array], index); 5379 5380 case PROGRAM_OUTPUT: 5381 if (!array_id) { 5382 if (t->procType == PIPE_SHADER_FRAGMENT) 5383 assert(index < 2 * FRAG_RESULT_MAX); 5384 else if (t->procType == PIPE_SHADER_TESS_CTRL || 5385 t->procType == PIPE_SHADER_TESS_EVAL) 5386 assert(index < VARYING_SLOT_TESS_MAX); 5387 else 5388 assert(index < VARYING_SLOT_MAX); 5389 5390 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 5391 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); 5392 return t->outputs[t->outputMapping[index]]; 5393 } 5394 else { 5395 struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id); 5396 unsigned mesa_index = decl->mesa_index; 5397 int slot = t->outputMapping[mesa_index]; 5398 5399 assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); 5400 5401 struct ureg_dst dst = t->outputs[slot]; 5402 dst.ArrayID = array_id; 5403 return ureg_dst_array_offset(dst, index - mesa_index); 5404 } 5405 5406 case PROGRAM_ADDRESS: 5407 return t->address[index]; 5408 5409 default: 5410 assert(!"unknown dst register file"); 5411 return ureg_dst_undef(); 5412 } 5413} 5414 5415/** 5416 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 5417 */ 5418static struct ureg_src 5419src_register(struct st_translate *t, const st_src_reg *reg) 5420{ 5421 int index = reg->index; 5422 int double_reg2 = reg->double_reg2 ? 1 : 0; 5423 5424 switch(reg->file) { 5425 case PROGRAM_UNDEFINED: 5426 return ureg_imm4f(t->ureg, 0, 0, 0, 0); 5427 5428 case PROGRAM_TEMPORARY: 5429 case PROGRAM_ARRAY: 5430 return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id)); 5431 5432 case PROGRAM_OUTPUT: { 5433 struct ureg_dst dst = dst_register(t, reg->file, reg->index, reg->array_id); 5434 assert(dst.WriteMask != 0); 5435 unsigned shift = ffs(dst.WriteMask) - 1; 5436 return ureg_swizzle(ureg_src(dst), 5437 shift, 5438 MIN2(shift + 1, 3), 5439 MIN2(shift + 2, 3), 5440 MIN2(shift + 3, 3)); 5441 } 5442 5443 case PROGRAM_UNIFORM: 5444 assert(reg->index >= 0); 5445 return reg->index < t->num_constants ? 5446 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 5447 case PROGRAM_STATE_VAR: 5448 case PROGRAM_CONSTANT: /* ie, immediate */ 5449 if (reg->has_index2) 5450 return ureg_src_register(TGSI_FILE_CONSTANT, reg->index); 5451 else 5452 return reg->index >= 0 && reg->index < t->num_constants ? 5453 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 5454 5455 case PROGRAM_IMMEDIATE: 5456 assert(reg->index >= 0 && reg->index < t->num_immediates); 5457 return t->immediates[reg->index]; 5458 5459 case PROGRAM_INPUT: 5460 /* GLSL inputs are 64-bit containers, so we have to 5461 * map back to the original index and add the offset after 5462 * mapping. */ 5463 index -= double_reg2; 5464 if (!reg->array_id) { 5465 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); 5466 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); 5467 return t->inputs[t->inputMapping[index] + double_reg2]; 5468 } 5469 else { 5470 struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id); 5471 unsigned mesa_index = decl->mesa_index; 5472 int slot = t->inputMapping[mesa_index]; 5473 5474 assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); 5475 5476 struct ureg_src src = t->inputs[slot]; 5477 src.ArrayID = reg->array_id; 5478 return ureg_src_array_offset(src, index + double_reg2 - mesa_index); 5479 } 5480 5481 case PROGRAM_ADDRESS: 5482 return ureg_src(t->address[reg->index]); 5483 5484 case PROGRAM_SYSTEM_VALUE: 5485 assert(reg->index < (int) ARRAY_SIZE(t->systemValues)); 5486 return t->systemValues[reg->index]; 5487 5488 default: 5489 assert(!"unknown src register file"); 5490 return ureg_src_undef(); 5491 } 5492} 5493 5494/** 5495 * Create a TGSI ureg_dst register from an st_dst_reg. 5496 */ 5497static struct ureg_dst 5498translate_dst(struct st_translate *t, 5499 const st_dst_reg *dst_reg, 5500 bool saturate) 5501{ 5502 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, 5503 dst_reg->array_id); 5504 5505 if (dst.File == TGSI_FILE_NULL) 5506 return dst; 5507 5508 dst = ureg_writemask(dst, dst_reg->writemask); 5509 5510 if (saturate) 5511 dst = ureg_saturate(dst); 5512 5513 if (dst_reg->reladdr != NULL) { 5514 assert(dst_reg->file != PROGRAM_TEMPORARY); 5515 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 5516 } 5517 5518 if (dst_reg->has_index2) { 5519 if (dst_reg->reladdr2) 5520 dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]), 5521 dst_reg->index2D); 5522 else 5523 dst = ureg_dst_dimension(dst, dst_reg->index2D); 5524 } 5525 5526 return dst; 5527} 5528 5529/** 5530 * Create a TGSI ureg_src register from an st_src_reg. 5531 */ 5532static struct ureg_src 5533translate_src(struct st_translate *t, const st_src_reg *src_reg) 5534{ 5535 struct ureg_src src = src_register(t, src_reg); 5536 5537 if (src_reg->has_index2) { 5538 /* 2D indexes occur with geometry shader inputs (attrib, vertex) 5539 * and UBO constant buffers (buffer, position). 5540 */ 5541 if (src_reg->reladdr2) 5542 src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]), 5543 src_reg->index2D); 5544 else 5545 src = ureg_src_dimension(src, src_reg->index2D); 5546 } 5547 5548 src = ureg_swizzle(src, 5549 GET_SWZ(src_reg->swizzle, 0) & 0x3, 5550 GET_SWZ(src_reg->swizzle, 1) & 0x3, 5551 GET_SWZ(src_reg->swizzle, 2) & 0x3, 5552 GET_SWZ(src_reg->swizzle, 3) & 0x3); 5553 5554 if (src_reg->abs) 5555 src = ureg_abs(src); 5556 5557 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 5558 src = ureg_negate(src); 5559 5560 if (src_reg->reladdr != NULL) { 5561 assert(src_reg->file != PROGRAM_TEMPORARY); 5562 src = ureg_src_indirect(src, ureg_src(t->address[0])); 5563 } 5564 5565 return src; 5566} 5567 5568static struct tgsi_texture_offset 5569translate_tex_offset(struct st_translate *t, 5570 const st_src_reg *in_offset) 5571{ 5572 struct tgsi_texture_offset offset; 5573 struct ureg_src src = translate_src(t, in_offset); 5574 5575 offset.File = src.File; 5576 offset.Index = src.Index; 5577 offset.SwizzleX = src.SwizzleX; 5578 offset.SwizzleY = src.SwizzleY; 5579 offset.SwizzleZ = src.SwizzleZ; 5580 offset.Padding = 0; 5581 5582 assert(!src.Indirect); 5583 assert(!src.DimIndirect); 5584 assert(!src.Dimension); 5585 assert(!src.Absolute); /* those shouldn't be used with integers anyway */ 5586 assert(!src.Negate); 5587 5588 return offset; 5589} 5590 5591static void 5592compile_tgsi_instruction(struct st_translate *t, 5593 const glsl_to_tgsi_instruction *inst) 5594{ 5595 struct ureg_program *ureg = t->ureg; 5596 int i; 5597 struct ureg_dst dst[2]; 5598 struct ureg_src src[4]; 5599 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 5600 5601 int num_dst; 5602 int num_src; 5603 unsigned tex_target = 0; 5604 5605 num_dst = num_inst_dst_regs(inst); 5606 num_src = num_inst_src_regs(inst); 5607 5608 for (i = 0; i < num_dst; i++) 5609 dst[i] = translate_dst(t, 5610 &inst->dst[i], 5611 inst->saturate); 5612 5613 for (i = 0; i < num_src; i++) 5614 src[i] = translate_src(t, &inst->src[i]); 5615 5616 switch(inst->op) { 5617 case TGSI_OPCODE_BGNLOOP: 5618 case TGSI_OPCODE_ELSE: 5619 case TGSI_OPCODE_ENDLOOP: 5620 case TGSI_OPCODE_IF: 5621 case TGSI_OPCODE_UIF: 5622 assert(num_dst == 0); 5623 ureg_insn(ureg, inst->op, NULL, 0, src, num_src); 5624 return; 5625 5626 case TGSI_OPCODE_TEX: 5627 case TGSI_OPCODE_TXB: 5628 case TGSI_OPCODE_TXD: 5629 case TGSI_OPCODE_TXL: 5630 case TGSI_OPCODE_TXP: 5631 case TGSI_OPCODE_TXQ: 5632 case TGSI_OPCODE_TXQS: 5633 case TGSI_OPCODE_TXF: 5634 case TGSI_OPCODE_TEX2: 5635 case TGSI_OPCODE_TXB2: 5636 case TGSI_OPCODE_TXL2: 5637 case TGSI_OPCODE_TG4: 5638 case TGSI_OPCODE_LODQ: 5639 src[num_src] = t->samplers[inst->resource.index]; 5640 assert(src[num_src].File != TGSI_FILE_NULL); 5641 if (inst->resource.reladdr) 5642 src[num_src] = 5643 ureg_src_indirect(src[num_src], ureg_src(t->address[2])); 5644 num_src++; 5645 for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { 5646 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 5647 } 5648 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5649 5650 ureg_tex_insn(ureg, 5651 inst->op, 5652 dst, num_dst, 5653 tex_target, 5654 texoffsets, inst->tex_offset_num_offset, 5655 src, num_src); 5656 return; 5657 5658 case TGSI_OPCODE_RESQ: 5659 case TGSI_OPCODE_LOAD: 5660 case TGSI_OPCODE_ATOMUADD: 5661 case TGSI_OPCODE_ATOMXCHG: 5662 case TGSI_OPCODE_ATOMCAS: 5663 case TGSI_OPCODE_ATOMAND: 5664 case TGSI_OPCODE_ATOMOR: 5665 case TGSI_OPCODE_ATOMXOR: 5666 case TGSI_OPCODE_ATOMUMIN: 5667 case TGSI_OPCODE_ATOMUMAX: 5668 case TGSI_OPCODE_ATOMIMIN: 5669 case TGSI_OPCODE_ATOMIMAX: 5670 for (i = num_src - 1; i >= 0; i--) 5671 src[i + 1] = src[i]; 5672 num_src++; 5673 if (inst->resource.file == PROGRAM_MEMORY) { 5674 src[0] = t->shared_memory; 5675 } else if (inst->resource.file == PROGRAM_BUFFER) { 5676 src[0] = t->buffers[inst->resource.index]; 5677 } else { 5678 src[0] = t->images[inst->resource.index]; 5679 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5680 } 5681 if (inst->resource.reladdr) 5682 src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); 5683 assert(src[0].File != TGSI_FILE_NULL); 5684 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 5685 inst->buffer_access, 5686 tex_target, inst->image_format); 5687 break; 5688 5689 case TGSI_OPCODE_STORE: 5690 if (inst->resource.file == PROGRAM_MEMORY) { 5691 dst[0] = ureg_dst(t->shared_memory); 5692 } else if (inst->resource.file == PROGRAM_BUFFER) { 5693 dst[0] = ureg_dst(t->buffers[inst->resource.index]); 5694 } else { 5695 dst[0] = ureg_dst(t->images[inst->resource.index]); 5696 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5697 } 5698 dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); 5699 if (inst->resource.reladdr) 5700 dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); 5701 assert(dst[0].File != TGSI_FILE_NULL); 5702 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 5703 inst->buffer_access, 5704 tex_target, inst->image_format); 5705 break; 5706 5707 case TGSI_OPCODE_SCS: 5708 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 5709 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 5710 break; 5711 5712 default: 5713 ureg_insn(ureg, 5714 inst->op, 5715 dst, num_dst, 5716 src, num_src); 5717 break; 5718 } 5719} 5720 5721/** 5722 * Emit the TGSI instructions for inverting and adjusting WPOS. 5723 * This code is unavoidable because it also depends on whether 5724 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 5725 */ 5726static void 5727emit_wpos_adjustment(struct gl_context *ctx, 5728 struct st_translate *t, 5729 int wpos_transform_const, 5730 boolean invert, 5731 GLfloat adjX, GLfloat adjY[2]) 5732{ 5733 struct ureg_program *ureg = t->ureg; 5734 5735 assert(wpos_transform_const >= 0); 5736 5737 /* Fragment program uses fragment position input. 5738 * Need to replace instances of INPUT[WPOS] with temp T 5739 * where T = INPUT[WPOS] is inverted by Y. 5740 */ 5741 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); 5742 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 5743 struct ureg_src *wpos = 5744 ctx->Const.GLSLFragCoordIsSysVal ? 5745 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : 5746 &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; 5747 struct ureg_src wpos_input = *wpos; 5748 5749 /* First, apply the coordinate shift: */ 5750 if (adjX || adjY[0] || adjY[1]) { 5751 if (adjY[0] != adjY[1]) { 5752 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 5753 * depending on whether inversion is actually going to be applied 5754 * or not, which is determined by testing against the inversion 5755 * state variable used below, which will be either +1 or -1. 5756 */ 5757 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 5758 5759 ureg_CMP(ureg, adj_temp, 5760 ureg_scalar(wpostrans, invert ? 2 : 0), 5761 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 5762 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 5763 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 5764 } else { 5765 ureg_ADD(ureg, wpos_temp, wpos_input, 5766 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 5767 } 5768 wpos_input = ureg_src(wpos_temp); 5769 } else { 5770 /* MOV wpos_temp, input[wpos] 5771 */ 5772 ureg_MOV( ureg, wpos_temp, wpos_input ); 5773 } 5774 5775 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 5776 * inversion/identity, or the other way around if we're drawing to an FBO. 5777 */ 5778 if (invert) { 5779 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 5780 */ 5781 ureg_MAD( ureg, 5782 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 5783 wpos_input, 5784 ureg_scalar(wpostrans, 0), 5785 ureg_scalar(wpostrans, 1)); 5786 } else { 5787 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 5788 */ 5789 ureg_MAD( ureg, 5790 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 5791 wpos_input, 5792 ureg_scalar(wpostrans, 2), 5793 ureg_scalar(wpostrans, 3)); 5794 } 5795 5796 /* Use wpos_temp as position input from here on: 5797 */ 5798 *wpos = ureg_src(wpos_temp); 5799} 5800 5801 5802/** 5803 * Emit fragment position/ooordinate code. 5804 */ 5805static void 5806emit_wpos(struct st_context *st, 5807 struct st_translate *t, 5808 const struct gl_program *program, 5809 struct ureg_program *ureg, 5810 int wpos_transform_const) 5811{ 5812 struct pipe_screen *pscreen = st->pipe->screen; 5813 GLfloat adjX = 0.0f; 5814 GLfloat adjY[2] = { 0.0f, 0.0f }; 5815 boolean invert = FALSE; 5816 5817 /* Query the pixel center conventions supported by the pipe driver and set 5818 * adjX, adjY to help out if it cannot handle the requested one internally. 5819 * 5820 * The bias of the y-coordinate depends on whether y-inversion takes place 5821 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 5822 * drawing to an FBO (causes additional inversion), and whether the pipe 5823 * driver origin and the requested origin differ (the latter condition is 5824 * stored in the 'invert' variable). 5825 * 5826 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 5827 * 5828 * center shift only: 5829 * i -> h: +0.5 5830 * h -> i: -0.5 5831 * 5832 * inversion only: 5833 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 5834 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 5835 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 5836 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 5837 * 5838 * inversion and center shift: 5839 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 5840 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 5841 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 5842 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 5843 */ 5844 if (program->OriginUpperLeft) { 5845 /* Fragment shader wants origin in upper-left */ 5846 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 5847 /* the driver supports upper-left origin */ 5848 } 5849 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 5850 /* the driver supports lower-left origin, need to invert Y */ 5851 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 5852 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 5853 invert = TRUE; 5854 } 5855 else 5856 assert(0); 5857 } 5858 else { 5859 /* Fragment shader wants origin in lower-left */ 5860 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 5861 /* the driver supports lower-left origin */ 5862 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 5863 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 5864 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 5865 /* the driver supports upper-left origin, need to invert Y */ 5866 invert = TRUE; 5867 else 5868 assert(0); 5869 } 5870 5871 if (program->PixelCenterInteger) { 5872 /* Fragment shader wants pixel center integer */ 5873 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 5874 /* the driver supports pixel center integer */ 5875 adjY[1] = 1.0f; 5876 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 5877 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 5878 } 5879 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 5880 /* the driver supports pixel center half integer, need to bias X,Y */ 5881 adjX = -0.5f; 5882 adjY[0] = -0.5f; 5883 adjY[1] = 0.5f; 5884 } 5885 else 5886 assert(0); 5887 } 5888 else { 5889 /* Fragment shader wants pixel center half integer */ 5890 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 5891 /* the driver supports pixel center half integer */ 5892 } 5893 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 5894 /* the driver supports pixel center integer, need to bias X,Y */ 5895 adjX = adjY[0] = adjY[1] = 0.5f; 5896 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 5897 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 5898 } 5899 else 5900 assert(0); 5901 } 5902 5903 /* we invert after adjustment so that we avoid the MOV to temporary, 5904 * and reuse the adjustment ADD instead */ 5905 emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY); 5906} 5907 5908/** 5909 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 5910 * TGSI uses +1 for front, -1 for back. 5911 * This function converts the TGSI value to the GL value. Simply clamping/ 5912 * saturating the value to [0,1] does the job. 5913 */ 5914static void 5915emit_face_var(struct gl_context *ctx, struct st_translate *t) 5916{ 5917 struct ureg_program *ureg = t->ureg; 5918 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 5919 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; 5920 5921 if (ctx->Const.NativeIntegers) { 5922 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0)); 5923 } 5924 else { 5925 /* MOV_SAT face_temp, input[face] */ 5926 ureg_MOV(ureg, ureg_saturate(face_temp), face_input); 5927 } 5928 5929 /* Use face_temp as face input from here on: */ 5930 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); 5931} 5932 5933static void 5934emit_compute_block_size(const struct gl_program *prog, 5935 struct ureg_program *ureg) { 5936 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 5937 prog->info.cs.local_size[0]); 5938 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 5939 prog->info.cs.local_size[1]); 5940 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 5941 prog->info.cs.local_size[2]); 5942} 5943 5944struct sort_inout_decls { 5945 bool operator()(const struct inout_decl &a, const struct inout_decl &b) const { 5946 return mapping[a.mesa_index] < mapping[b.mesa_index]; 5947 } 5948 5949 const GLuint *mapping; 5950}; 5951 5952/* Sort the given array of decls by the corresponding slot (TGSI file index). 5953 * 5954 * This is for the benefit of older drivers which are broken when the 5955 * declarations aren't sorted in this way. 5956 */ 5957static void 5958sort_inout_decls_by_slot(struct inout_decl *decls, 5959 unsigned count, 5960 const GLuint mapping[]) 5961{ 5962 sort_inout_decls sorter; 5963 sorter.mapping = mapping; 5964 std::sort(decls, decls + count, sorter); 5965} 5966 5967static unsigned 5968st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying) 5969{ 5970 switch (glsl_qual) { 5971 case INTERP_MODE_NONE: 5972 if (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1) 5973 return TGSI_INTERPOLATE_COLOR; 5974 return TGSI_INTERPOLATE_PERSPECTIVE; 5975 case INTERP_MODE_SMOOTH: 5976 return TGSI_INTERPOLATE_PERSPECTIVE; 5977 case INTERP_MODE_FLAT: 5978 return TGSI_INTERPOLATE_CONSTANT; 5979 case INTERP_MODE_NOPERSPECTIVE: 5980 return TGSI_INTERPOLATE_LINEAR; 5981 default: 5982 assert(0 && "unexpected interp mode in st_translate_interp()"); 5983 return TGSI_INTERPOLATE_PERSPECTIVE; 5984 } 5985} 5986 5987/** 5988 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 5989 * \param program the program to translate 5990 * \param numInputs number of input registers used 5991 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 5992 * input indexes 5993 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 5994 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 5995 * each input 5996 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 5997 * \param numOutputs number of output registers used 5998 * \param outputMapping maps Mesa fragment program outputs to TGSI 5999 * generic outputs 6000 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 6001 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 6002 * each output 6003 * 6004 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 6005 */ 6006extern "C" enum pipe_error 6007st_translate_program( 6008 struct gl_context *ctx, 6009 uint procType, 6010 struct ureg_program *ureg, 6011 glsl_to_tgsi_visitor *program, 6012 const struct gl_program *proginfo, 6013 GLuint numInputs, 6014 const GLuint inputMapping[], 6015 const GLuint inputSlotToAttr[], 6016 const ubyte inputSemanticName[], 6017 const ubyte inputSemanticIndex[], 6018 const GLuint interpMode[], 6019 GLuint numOutputs, 6020 const GLuint outputMapping[], 6021 const GLuint outputSlotToAttr[], 6022 const ubyte outputSemanticName[], 6023 const ubyte outputSemanticIndex[]) 6024{ 6025 struct st_translate *t; 6026 unsigned i; 6027 struct gl_program_constants *frag_const = 6028 &ctx->Const.Program[MESA_SHADER_FRAGMENT]; 6029 enum pipe_error ret = PIPE_OK; 6030 6031 assert(numInputs <= ARRAY_SIZE(t->inputs)); 6032 assert(numOutputs <= ARRAY_SIZE(t->outputs)); 6033 6034 t = CALLOC_STRUCT(st_translate); 6035 if (!t) { 6036 ret = PIPE_ERROR_OUT_OF_MEMORY; 6037 goto out; 6038 } 6039 6040 t->procType = procType; 6041 t->inputMapping = inputMapping; 6042 t->outputMapping = outputMapping; 6043 t->ureg = ureg; 6044 t->num_temp_arrays = program->next_array; 6045 if (t->num_temp_arrays) 6046 t->arrays = (struct ureg_dst*) 6047 calloc(t->num_temp_arrays, sizeof(t->arrays[0])); 6048 6049 /* 6050 * Declare input attributes. 6051 */ 6052 switch (procType) { 6053 case PIPE_SHADER_FRAGMENT: 6054 case PIPE_SHADER_GEOMETRY: 6055 case PIPE_SHADER_TESS_EVAL: 6056 case PIPE_SHADER_TESS_CTRL: 6057 sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping); 6058 6059 for (i = 0; i < program->num_inputs; ++i) { 6060 struct inout_decl *decl = &program->inputs[i]; 6061 unsigned slot = inputMapping[decl->mesa_index]; 6062 struct ureg_src src; 6063 ubyte tgsi_usage_mask = decl->usage_mask; 6064 6065 if (glsl_base_type_is_64bit(decl->base_type)) { 6066 if (tgsi_usage_mask == 1) 6067 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6068 else if (tgsi_usage_mask == 2) 6069 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6070 else 6071 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6072 } 6073 6074 unsigned interp_mode = 0; 6075 unsigned interp_location = 0; 6076 if (procType == PIPE_SHADER_FRAGMENT) { 6077 assert(interpMode); 6078 interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? 6079 interpMode[slot] : 6080 st_translate_interp(decl->interp, inputSlotToAttr[slot]); 6081 6082 interp_location = decl->interp_loc; 6083 } 6084 6085 src = ureg_DECL_fs_input_cyl_centroid_layout(ureg, 6086 inputSemanticName[slot], inputSemanticIndex[slot], 6087 interp_mode, 0, interp_location, slot, tgsi_usage_mask, 6088 decl->array_id, decl->size); 6089 6090 for (unsigned j = 0; j < decl->size; ++j) { 6091 if (t->inputs[slot + j].File != TGSI_FILE_INPUT) { 6092 /* The ArrayID is set up in dst_register */ 6093 t->inputs[slot + j] = src; 6094 t->inputs[slot + j].ArrayID = 0; 6095 t->inputs[slot + j].Index += j; 6096 } 6097 } 6098 } 6099 break; 6100 case PIPE_SHADER_VERTEX: 6101 for (i = 0; i < numInputs; i++) { 6102 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 6103 } 6104 break; 6105 case PIPE_SHADER_COMPUTE: 6106 break; 6107 default: 6108 assert(0); 6109 } 6110 6111 /* 6112 * Declare output attributes. 6113 */ 6114 switch (procType) { 6115 case PIPE_SHADER_FRAGMENT: 6116 case PIPE_SHADER_COMPUTE: 6117 break; 6118 case PIPE_SHADER_GEOMETRY: 6119 case PIPE_SHADER_TESS_EVAL: 6120 case PIPE_SHADER_TESS_CTRL: 6121 case PIPE_SHADER_VERTEX: 6122 sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping); 6123 6124 for (i = 0; i < program->num_outputs; ++i) { 6125 struct inout_decl *decl = &program->outputs[i]; 6126 unsigned slot = outputMapping[decl->mesa_index]; 6127 struct ureg_dst dst; 6128 ubyte tgsi_usage_mask = decl->usage_mask; 6129 6130 if (glsl_base_type_is_64bit(decl->base_type)) { 6131 if (tgsi_usage_mask == 1) 6132 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6133 else if (tgsi_usage_mask == 2) 6134 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6135 else 6136 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6137 } 6138 6139 dst = ureg_DECL_output_layout(ureg, 6140 outputSemanticName[slot], outputSemanticIndex[slot], 6141 decl->gs_out_streams, 6142 slot, tgsi_usage_mask, decl->array_id, decl->size); 6143 6144 for (unsigned j = 0; j < decl->size; ++j) { 6145 if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { 6146 /* The ArrayID is set up in dst_register */ 6147 t->outputs[slot + j] = dst; 6148 t->outputs[slot + j].ArrayID = 0; 6149 t->outputs[slot + j].Index += j; 6150 } 6151 } 6152 } 6153 break; 6154 default: 6155 assert(0); 6156 } 6157 6158 if (procType == PIPE_SHADER_FRAGMENT) { 6159 if (program->shader->info.EarlyFragmentTests) 6160 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); 6161 6162 if (proginfo->info.inputs_read & VARYING_BIT_POS) { 6163 /* Must do this after setting up t->inputs. */ 6164 emit_wpos(st_context(ctx), t, proginfo, ureg, 6165 program->wpos_transform_const); 6166 } 6167 6168 if (proginfo->info.inputs_read & VARYING_BIT_FACE) 6169 emit_face_var(ctx, t); 6170 6171 for (i = 0; i < numOutputs; i++) { 6172 switch (outputSemanticName[i]) { 6173 case TGSI_SEMANTIC_POSITION: 6174 t->outputs[i] = ureg_DECL_output(ureg, 6175 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 6176 outputSemanticIndex[i]); 6177 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 6178 break; 6179 case TGSI_SEMANTIC_STENCIL: 6180 t->outputs[i] = ureg_DECL_output(ureg, 6181 TGSI_SEMANTIC_STENCIL, /* Stencil */ 6182 outputSemanticIndex[i]); 6183 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 6184 break; 6185 case TGSI_SEMANTIC_COLOR: 6186 t->outputs[i] = ureg_DECL_output(ureg, 6187 TGSI_SEMANTIC_COLOR, 6188 outputSemanticIndex[i]); 6189 break; 6190 case TGSI_SEMANTIC_SAMPLEMASK: 6191 t->outputs[i] = ureg_DECL_output(ureg, 6192 TGSI_SEMANTIC_SAMPLEMASK, 6193 outputSemanticIndex[i]); 6194 /* TODO: If we ever support more than 32 samples, this will have 6195 * to become an array. 6196 */ 6197 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6198 break; 6199 default: 6200 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 6201 ret = PIPE_ERROR_BAD_INPUT; 6202 goto out; 6203 } 6204 } 6205 } 6206 else if (procType == PIPE_SHADER_VERTEX) { 6207 for (i = 0; i < numOutputs; i++) { 6208 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { 6209 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ 6210 ureg_MOV(ureg, 6211 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), 6212 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 6213 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6214 } 6215 } 6216 } 6217 6218 if (procType == PIPE_SHADER_COMPUTE) { 6219 emit_compute_block_size(proginfo, ureg); 6220 } 6221 6222 /* Declare address register. 6223 */ 6224 if (program->num_address_regs > 0) { 6225 assert(program->num_address_regs <= 3); 6226 for (int i = 0; i < program->num_address_regs; i++) 6227 t->address[i] = ureg_DECL_address(ureg); 6228 } 6229 6230 /* Declare misc input registers 6231 */ 6232 { 6233 GLbitfield sysInputs = proginfo->info.system_values_read; 6234 6235 for (i = 0; sysInputs; i++) { 6236 if (sysInputs & (1 << i)) { 6237 unsigned semName = _mesa_sysval_to_semantic(i); 6238 6239 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); 6240 6241 if (semName == TGSI_SEMANTIC_INSTANCEID || 6242 semName == TGSI_SEMANTIC_VERTEXID) { 6243 /* From Gallium perspective, these system values are always 6244 * integer, and require native integer support. However, if 6245 * native integer is supported on the vertex stage but not the 6246 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 6247 * assumes these system values are floats. To resolve the 6248 * inconsistency, we insert a U2F. 6249 */ 6250 struct st_context *st = st_context(ctx); 6251 struct pipe_screen *pscreen = st->pipe->screen; 6252 assert(procType == PIPE_SHADER_VERTEX); 6253 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 6254 (void) pscreen; 6255 if (!ctx->Const.NativeIntegers) { 6256 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 6257 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); 6258 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 6259 } 6260 } 6261 6262 if (procType == PIPE_SHADER_FRAGMENT && 6263 semName == TGSI_SEMANTIC_POSITION) 6264 emit_wpos(st_context(ctx), t, proginfo, ureg, 6265 program->wpos_transform_const); 6266 6267 sysInputs &= ~(1 << i); 6268 } 6269 } 6270 } 6271 6272 t->array_sizes = program->array_sizes; 6273 t->input_decls = program->inputs; 6274 t->num_input_decls = program->num_inputs; 6275 t->output_decls = program->outputs; 6276 t->num_output_decls = program->num_outputs; 6277 6278 /* Emit constants and uniforms. TGSI uses a single index space for these, 6279 * so we put all the translated regs in t->constants. 6280 */ 6281 if (proginfo->Parameters) { 6282 t->constants = (struct ureg_src *) 6283 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); 6284 if (t->constants == NULL) { 6285 ret = PIPE_ERROR_OUT_OF_MEMORY; 6286 goto out; 6287 } 6288 t->num_constants = proginfo->Parameters->NumParameters; 6289 6290 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 6291 switch (proginfo->Parameters->Parameters[i].Type) { 6292 case PROGRAM_STATE_VAR: 6293 case PROGRAM_UNIFORM: 6294 t->constants[i] = ureg_DECL_constant(ureg, i); 6295 break; 6296 6297 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 6298 * addressing of the const buffer. 6299 * FIXME: Be smarter and recognize param arrays: 6300 * indirect addressing is only valid within the referenced 6301 * array. 6302 */ 6303 case PROGRAM_CONSTANT: 6304 if (program->indirect_addr_consts) 6305 t->constants[i] = ureg_DECL_constant(ureg, i); 6306 else 6307 t->constants[i] = emit_immediate(t, 6308 proginfo->Parameters->ParameterValues[i], 6309 proginfo->Parameters->Parameters[i].DataType, 6310 4); 6311 break; 6312 default: 6313 break; 6314 } 6315 } 6316 } 6317 6318 for (i = 0; i < proginfo->info.num_ubos; i++) { 6319 unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; 6320 unsigned num_const_vecs = (size + 15) / 16; 6321 unsigned first, last; 6322 assert(num_const_vecs > 0); 6323 first = 0; 6324 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; 6325 ureg_DECL_constant2D(t->ureg, first, last, i + 1); 6326 } 6327 6328 /* Emit immediate values. 6329 */ 6330 t->immediates = (struct ureg_src *) 6331 calloc(program->num_immediates, sizeof(struct ureg_src)); 6332 if (t->immediates == NULL) { 6333 ret = PIPE_ERROR_OUT_OF_MEMORY; 6334 goto out; 6335 } 6336 t->num_immediates = program->num_immediates; 6337 6338 i = 0; 6339 foreach_in_list(immediate_storage, imm, &program->immediates) { 6340 assert(i < program->num_immediates); 6341 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32); 6342 } 6343 assert(i == program->num_immediates); 6344 6345 /* texture samplers */ 6346 for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { 6347 if (program->samplers_used & (1u << i)) { 6348 unsigned type; 6349 6350 t->samplers[i] = ureg_DECL_sampler(ureg, i); 6351 6352 switch (program->sampler_types[i]) { 6353 case GLSL_TYPE_INT: 6354 type = TGSI_RETURN_TYPE_SINT; 6355 break; 6356 case GLSL_TYPE_UINT: 6357 type = TGSI_RETURN_TYPE_UINT; 6358 break; 6359 case GLSL_TYPE_FLOAT: 6360 type = TGSI_RETURN_TYPE_FLOAT; 6361 break; 6362 default: 6363 unreachable("not reached"); 6364 } 6365 6366 ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], 6367 type, type, type, type ); 6368 } 6369 } 6370 6371 for (i = 0; i < frag_const->MaxAtomicBuffers; i++) { 6372 if (program->buffers_used & (1 << i)) { 6373 t->buffers[i] = ureg_DECL_buffer(ureg, i, true); 6374 } 6375 } 6376 6377 for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks; 6378 i++) { 6379 if (program->buffers_used & (1 << i)) { 6380 t->buffers[i] = ureg_DECL_buffer(ureg, i, false); 6381 } 6382 } 6383 6384 if (program->use_shared_memory) 6385 t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); 6386 6387 for (i = 0; i < program->shader->Program->info.num_images; i++) { 6388 if (program->images_used & (1 << i)) { 6389 t->images[i] = ureg_DECL_image(ureg, i, 6390 program->image_targets[i], 6391 program->image_formats[i], 6392 true, false); 6393 } 6394 } 6395 6396 /* Emit each instruction in turn: 6397 */ 6398 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) 6399 compile_tgsi_instruction(t, inst); 6400 6401 /* Set the next shader stage hint for VS and TES. */ 6402 switch (procType) { 6403 case PIPE_SHADER_VERTEX: 6404 case PIPE_SHADER_TESS_EVAL: 6405 if (program->shader_program->SeparateShader) 6406 break; 6407 6408 for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) { 6409 if (program->shader_program->_LinkedShaders[i]) { 6410 unsigned next; 6411 6412 switch (i) { 6413 case MESA_SHADER_TESS_CTRL: 6414 next = PIPE_SHADER_TESS_CTRL; 6415 break; 6416 case MESA_SHADER_TESS_EVAL: 6417 next = PIPE_SHADER_TESS_EVAL; 6418 break; 6419 case MESA_SHADER_GEOMETRY: 6420 next = PIPE_SHADER_GEOMETRY; 6421 break; 6422 case MESA_SHADER_FRAGMENT: 6423 next = PIPE_SHADER_FRAGMENT; 6424 break; 6425 default: 6426 assert(0); 6427 continue; 6428 } 6429 6430 ureg_set_next_shader_processor(ureg, next); 6431 break; 6432 } 6433 } 6434 break; 6435 } 6436 6437out: 6438 if (t) { 6439 free(t->arrays); 6440 free(t->temps); 6441 free(t->constants); 6442 t->num_constants = 0; 6443 free(t->immediates); 6444 t->num_immediates = 0; 6445 FREE(t); 6446 } 6447 6448 return ret; 6449} 6450/* ----------------------------- End TGSI code ------------------------------ */ 6451 6452 6453/** 6454 * Convert a shader's GLSL IR into a Mesa gl_program, although without 6455 * generating Mesa IR. 6456 */ 6457static struct gl_program * 6458get_mesa_program_tgsi(struct gl_context *ctx, 6459 struct gl_shader_program *shader_program, 6460 struct gl_linked_shader *shader) 6461{ 6462 glsl_to_tgsi_visitor* v; 6463 struct gl_program *prog; 6464 struct gl_shader_compiler_options *options = 6465 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 6466 struct pipe_screen *pscreen = ctx->st->pipe->screen; 6467 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); 6468 6469 validate_ir_tree(shader->ir); 6470 6471 prog = shader->Program; 6472 6473 prog->Parameters = _mesa_new_parameter_list(); 6474 v = new glsl_to_tgsi_visitor(); 6475 v->ctx = ctx; 6476 v->prog = prog; 6477 v->shader_program = shader_program; 6478 v->shader = shader; 6479 v->options = options; 6480 v->glsl_version = ctx->Const.GLSLVersion; 6481 v->native_integers = ctx->Const.NativeIntegers; 6482 6483 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, 6484 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); 6485 v->have_fma = pscreen->get_shader_param(pscreen, ptarget, 6486 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); 6487 6488 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 6489 prog->Parameters); 6490 6491 /* Remove reads from output registers. */ 6492 if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) 6493 lower_output_reads(shader->Stage, shader->ir); 6494 6495 /* Emit intermediate IR for main(). */ 6496 visit_exec_list(shader->ir, v); 6497 6498#if 0 6499 /* Print out some information (for debugging purposes) used by the 6500 * optimization passes. */ 6501 { 6502 int i; 6503 int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); 6504 int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); 6505 int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp); 6506 int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp); 6507 6508 for (i = 0; i < v->next_temp; i++) { 6509 first_writes[i] = -1; 6510 first_reads[i] = -1; 6511 last_writes[i] = -1; 6512 last_reads[i] = -1; 6513 } 6514 v->get_first_temp_read(first_reads); 6515 v->get_last_temp_read_first_temp_write(last_reads, first_writes); 6516 v->get_last_temp_write(last_writes); 6517 for (i = 0; i < v->next_temp; i++) 6518 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], 6519 first_writes[i], 6520 last_reads[i], 6521 last_writes[i]); 6522 ralloc_free(first_writes); 6523 ralloc_free(first_reads); 6524 ralloc_free(last_writes); 6525 ralloc_free(last_reads); 6526 } 6527#endif 6528 6529 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 6530 v->simplify_cmp(); 6531 6532 if (shader->Stage != MESA_SHADER_TESS_CTRL && 6533 shader->Stage != MESA_SHADER_TESS_EVAL) 6534 v->copy_propagate(); 6535 6536 while (v->eliminate_dead_code()); 6537 6538 v->merge_two_dsts(); 6539 v->merge_registers(); 6540 v->renumber_registers(); 6541 6542 /* Write the END instruction. */ 6543 v->emit_asm(NULL, TGSI_OPCODE_END); 6544 6545 if (ctx->_Shader->Flags & GLSL_DUMP) { 6546 _mesa_log("\n"); 6547 _mesa_log("GLSL IR for linked %s program %d:\n", 6548 _mesa_shader_stage_to_string(shader->Stage), 6549 shader_program->Name); 6550 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); 6551 _mesa_log("\n\n"); 6552 } 6553 6554 do_set_program_inouts(shader->ir, prog, shader->Stage); 6555 _mesa_copy_linked_program_data(shader_program, shader); 6556 shrink_array_declarations(v->inputs, v->num_inputs, 6557 &prog->info.inputs_read, 6558 prog->info.double_inputs_read, 6559 &prog->info.patch_inputs_read); 6560 shrink_array_declarations(v->outputs, v->num_outputs, 6561 &prog->info.outputs_written, 0ULL, 6562 &prog->info.patch_outputs_written); 6563 count_resources(v, prog); 6564 6565 /* The GLSL IR won't be needed anymore. */ 6566 ralloc_free(shader->ir); 6567 shader->ir = NULL; 6568 6569 /* This must be done before the uniform storage is associated. */ 6570 if (shader->Stage == MESA_SHADER_FRAGMENT && 6571 (prog->info.inputs_read & VARYING_BIT_POS || 6572 prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) { 6573 static const gl_state_index wposTransformState[STATE_LENGTH] = { 6574 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM 6575 }; 6576 6577 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters, 6578 wposTransformState); 6579 } 6580 6581 /* Avoid reallocation of the program parameter list, because the uniform 6582 * storage is only associated with the original parameter list. 6583 * This should be enough for Bitmap and DrawPixels constants. 6584 */ 6585 _mesa_reserve_parameter_storage(prog->Parameters, 8); 6586 6587 /* This has to be done last. Any operation the can cause 6588 * prog->ParameterValues to get reallocated (e.g., anything that adds a 6589 * program constant) has to happen before creating this linkage. 6590 */ 6591 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 6592 if (!shader_program->data->LinkStatus) { 6593 free_glsl_to_tgsi_visitor(v); 6594 _mesa_reference_program(ctx, &shader->Program, NULL); 6595 return NULL; 6596 } 6597 6598 struct st_vertex_program *stvp; 6599 struct st_fragment_program *stfp; 6600 struct st_geometry_program *stgp; 6601 struct st_tessctrl_program *sttcp; 6602 struct st_tesseval_program *sttep; 6603 struct st_compute_program *stcp; 6604 6605 switch (shader->Stage) { 6606 case MESA_SHADER_VERTEX: 6607 stvp = (struct st_vertex_program *)prog; 6608 stvp->glsl_to_tgsi = v; 6609 break; 6610 case MESA_SHADER_FRAGMENT: 6611 stfp = (struct st_fragment_program *)prog; 6612 stfp->glsl_to_tgsi = v; 6613 break; 6614 case MESA_SHADER_GEOMETRY: 6615 stgp = (struct st_geometry_program *)prog; 6616 stgp->glsl_to_tgsi = v; 6617 break; 6618 case MESA_SHADER_TESS_CTRL: 6619 sttcp = (struct st_tessctrl_program *)prog; 6620 sttcp->glsl_to_tgsi = v; 6621 break; 6622 case MESA_SHADER_TESS_EVAL: 6623 sttep = (struct st_tesseval_program *)prog; 6624 sttep->glsl_to_tgsi = v; 6625 break; 6626 case MESA_SHADER_COMPUTE: 6627 stcp = (struct st_compute_program *)prog; 6628 stcp->glsl_to_tgsi = v; 6629 break; 6630 default: 6631 assert(!"should not be reached"); 6632 return NULL; 6633 } 6634 6635 return prog; 6636} 6637 6638static void 6639set_affected_state_flags(uint64_t *states, 6640 struct gl_program *prog, 6641 uint64_t new_constants, 6642 uint64_t new_sampler_views, 6643 uint64_t new_samplers, 6644 uint64_t new_images, 6645 uint64_t new_ubos, 6646 uint64_t new_ssbos, 6647 uint64_t new_atomics) 6648{ 6649 if (prog->Parameters->NumParameters) 6650 *states |= new_constants; 6651 6652 if (prog->info.num_textures) 6653 *states |= new_sampler_views | new_samplers; 6654 6655 if (prog->info.num_images) 6656 *states |= new_images; 6657 6658 if (prog->info.num_ubos) 6659 *states |= new_ubos; 6660 6661 if (prog->info.num_ssbos) 6662 *states |= new_ssbos; 6663 6664 if (prog->info.num_abos) 6665 *states |= new_atomics; 6666} 6667 6668static struct gl_program * 6669get_mesa_program(struct gl_context *ctx, 6670 struct gl_shader_program *shader_program, 6671 struct gl_linked_shader *shader) 6672{ 6673 struct pipe_screen *pscreen = ctx->st->pipe->screen; 6674 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage); 6675 enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) 6676 pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR); 6677 struct gl_program *prog = NULL; 6678 6679 if (preferred_ir == PIPE_SHADER_IR_NIR) { 6680 /* TODO only for GLSL VS/FS for now: */ 6681 switch (shader->Stage) { 6682 case MESA_SHADER_VERTEX: 6683 case MESA_SHADER_FRAGMENT: 6684 prog = st_nir_get_mesa_program(ctx, shader_program, shader); 6685 default: 6686 break; 6687 } 6688 } else { 6689 prog = get_mesa_program_tgsi(ctx, shader_program, shader); 6690 } 6691 6692 if (prog) { 6693 uint64_t *states; 6694 6695 /* This determines which states will be updated when the shader is 6696 * bound. 6697 */ 6698 switch (shader->Stage) { 6699 case MESA_SHADER_VERTEX: 6700 states = &((struct st_vertex_program*)prog)->affected_states; 6701 6702 *states = ST_NEW_VS_STATE | 6703 ST_NEW_RASTERIZER | 6704 ST_NEW_VERTEX_ARRAYS; 6705 6706 set_affected_state_flags(states, prog, 6707 ST_NEW_VS_CONSTANTS, 6708 ST_NEW_VS_SAMPLER_VIEWS, 6709 ST_NEW_RENDER_SAMPLERS, 6710 ST_NEW_VS_IMAGES, 6711 ST_NEW_VS_UBOS, 6712 ST_NEW_VS_SSBOS, 6713 ST_NEW_VS_ATOMICS); 6714 break; 6715 6716 case MESA_SHADER_TESS_CTRL: 6717 states = &((struct st_tessctrl_program*)prog)->affected_states; 6718 6719 *states = ST_NEW_TCS_STATE; 6720 6721 set_affected_state_flags(states, prog, 6722 ST_NEW_TCS_CONSTANTS, 6723 ST_NEW_TCS_SAMPLER_VIEWS, 6724 ST_NEW_RENDER_SAMPLERS, 6725 ST_NEW_TCS_IMAGES, 6726 ST_NEW_TCS_UBOS, 6727 ST_NEW_TCS_SSBOS, 6728 ST_NEW_TCS_ATOMICS); 6729 break; 6730 6731 case MESA_SHADER_TESS_EVAL: 6732 states = &((struct st_tesseval_program*)prog)->affected_states; 6733 6734 *states = ST_NEW_TES_STATE | 6735 ST_NEW_RASTERIZER; 6736 6737 set_affected_state_flags(states, prog, 6738 ST_NEW_TES_CONSTANTS, 6739 ST_NEW_TES_SAMPLER_VIEWS, 6740 ST_NEW_RENDER_SAMPLERS, 6741 ST_NEW_TES_IMAGES, 6742 ST_NEW_TES_UBOS, 6743 ST_NEW_TES_SSBOS, 6744 ST_NEW_TES_ATOMICS); 6745 break; 6746 6747 case MESA_SHADER_GEOMETRY: 6748 states = &((struct st_geometry_program*)prog)->affected_states; 6749 6750 *states = ST_NEW_GS_STATE | 6751 ST_NEW_RASTERIZER; 6752 6753 set_affected_state_flags(states, prog, 6754 ST_NEW_GS_CONSTANTS, 6755 ST_NEW_GS_SAMPLER_VIEWS, 6756 ST_NEW_RENDER_SAMPLERS, 6757 ST_NEW_GS_IMAGES, 6758 ST_NEW_GS_UBOS, 6759 ST_NEW_GS_SSBOS, 6760 ST_NEW_GS_ATOMICS); 6761 break; 6762 6763 case MESA_SHADER_FRAGMENT: 6764 states = &((struct st_fragment_program*)prog)->affected_states; 6765 6766 /* gl_FragCoord and glDrawPixels always use constants. */ 6767 *states = ST_NEW_FS_STATE | 6768 ST_NEW_SAMPLE_SHADING | 6769 ST_NEW_FS_CONSTANTS; 6770 6771 set_affected_state_flags(states, prog, 6772 ST_NEW_FS_CONSTANTS, 6773 ST_NEW_FS_SAMPLER_VIEWS, 6774 ST_NEW_RENDER_SAMPLERS, 6775 ST_NEW_FS_IMAGES, 6776 ST_NEW_FS_UBOS, 6777 ST_NEW_FS_SSBOS, 6778 ST_NEW_FS_ATOMICS); 6779 break; 6780 6781 case MESA_SHADER_COMPUTE: 6782 states = &((struct st_compute_program*)prog)->affected_states; 6783 6784 *states = ST_NEW_CS_STATE; 6785 6786 set_affected_state_flags(states, prog, 6787 ST_NEW_CS_CONSTANTS, 6788 ST_NEW_CS_SAMPLER_VIEWS, 6789 ST_NEW_CS_SAMPLERS, 6790 ST_NEW_CS_IMAGES, 6791 ST_NEW_CS_UBOS, 6792 ST_NEW_CS_SSBOS, 6793 ST_NEW_CS_ATOMICS); 6794 break; 6795 6796 default: 6797 unreachable("unhandled shader stage"); 6798 } 6799 } 6800 6801 return prog; 6802} 6803 6804/* See if there are unsupported control flow statements. */ 6805class ir_control_flow_info_visitor : public ir_hierarchical_visitor { 6806private: 6807 const struct gl_shader_compiler_options *options; 6808public: 6809 ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) 6810 : options(options), 6811 unsupported(false) 6812 { 6813 } 6814 6815 virtual ir_visitor_status visit_enter(ir_function *ir) 6816 { 6817 /* Other functions are skipped (same as glsl_to_tgsi). */ 6818 if (strcmp(ir->name, "main") == 0) 6819 return visit_continue; 6820 6821 return visit_continue_with_parent; 6822 } 6823 6824 virtual ir_visitor_status visit_enter(ir_call *ir) 6825 { 6826 if (!ir->callee->is_intrinsic()) { 6827 unsupported = true; /* it's a function call */ 6828 return visit_stop; 6829 } 6830 return visit_continue; 6831 } 6832 6833 virtual ir_visitor_status visit_enter(ir_return *ir) 6834 { 6835 if (options->EmitNoMainReturn) { 6836 unsupported = true; 6837 return visit_stop; 6838 } 6839 return visit_continue; 6840 } 6841 6842 bool unsupported; 6843}; 6844 6845static bool 6846has_unsupported_control_flow(exec_list *ir, 6847 const struct gl_shader_compiler_options *options) 6848{ 6849 ir_control_flow_info_visitor visitor(options); 6850 visit_list_elements(&visitor, ir); 6851 return visitor.unsupported; 6852} 6853 6854extern "C" { 6855 6856/** 6857 * Link a shader. 6858 * Called via ctx->Driver.LinkShader() 6859 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 6860 * with code lowering and other optimizations. 6861 */ 6862GLboolean 6863st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 6864{ 6865 struct pipe_screen *pscreen = ctx->st->pipe->screen; 6866 assert(prog->data->LinkStatus); 6867 6868 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 6869 if (prog->_LinkedShaders[i] == NULL) 6870 continue; 6871 6872 struct gl_linked_shader *shader = prog->_LinkedShaders[i]; 6873 exec_list *ir = shader->ir; 6874 gl_shader_stage stage = shader->Stage; 6875 const struct gl_shader_compiler_options *options = 6876 &ctx->Const.ShaderCompilerOptions[stage]; 6877 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage); 6878 bool have_dround = pscreen->get_shader_param(pscreen, ptarget, 6879 PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); 6880 bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, 6881 PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED); 6882 unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, 6883 PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); 6884 6885 /* If there are forms of indirect addressing that the driver 6886 * cannot handle, perform the lowering pass. 6887 */ 6888 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || 6889 options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { 6890 lower_variable_index_to_cond_assign(stage, ir, 6891 options->EmitNoIndirectInput, 6892 options->EmitNoIndirectOutput, 6893 options->EmitNoIndirectTemp, 6894 options->EmitNoIndirectUniform); 6895 } 6896 6897 if (ctx->Extensions.ARB_shading_language_packing) { 6898 unsigned lower_inst = LOWER_PACK_SNORM_2x16 | 6899 LOWER_UNPACK_SNORM_2x16 | 6900 LOWER_PACK_UNORM_2x16 | 6901 LOWER_UNPACK_UNORM_2x16 | 6902 LOWER_PACK_SNORM_4x8 | 6903 LOWER_UNPACK_SNORM_4x8 | 6904 LOWER_UNPACK_UNORM_4x8 | 6905 LOWER_PACK_UNORM_4x8; 6906 6907 if (ctx->Extensions.ARB_gpu_shader5) 6908 lower_inst |= LOWER_PACK_USE_BFI | 6909 LOWER_PACK_USE_BFE; 6910 if (!ctx->st->has_half_float_packing) 6911 lower_inst |= LOWER_PACK_HALF_2x16 | 6912 LOWER_UNPACK_HALF_2x16; 6913 6914 lower_packing_builtins(ir, lower_inst); 6915 } 6916 6917 if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) 6918 lower_offset_arrays(ir); 6919 do_mat_op_to_vec(ir); 6920 6921 if (stage == MESA_SHADER_FRAGMENT) 6922 lower_blend_equation_advanced(shader); 6923 6924 lower_instructions(ir, 6925 MOD_TO_FLOOR | 6926 FDIV_TO_MUL_RCP | 6927 EXP_TO_EXP2 | 6928 LOG_TO_LOG2 | 6929 LDEXP_TO_ARITH | 6930 (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | 6931 CARRY_TO_ARITH | 6932 BORROW_TO_ARITH | 6933 (have_dround ? 0 : DOPS_TO_DFRAC) | 6934 (options->EmitNoPow ? POW_TO_EXP2 : 0) | 6935 (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | 6936 (options->EmitNoSat ? SAT_TO_CLAMP : 0) | 6937 /* Assume that if ARB_gpu_shader5 is not supported 6938 * then all of the extended integer functions need 6939 * lowering. It may be necessary to add some caps 6940 * for individual instructions. 6941 */ 6942 (!ctx->Extensions.ARB_gpu_shader5 6943 ? BIT_COUNT_TO_MATH | 6944 EXTRACT_TO_SHIFTS | 6945 INSERT_TO_SHIFTS | 6946 REVERSE_TO_SHIFTS | 6947 FIND_LSB_TO_FLOAT_CAST | 6948 FIND_MSB_TO_FLOAT_CAST | 6949 IMUL_HIGH_TO_MUL 6950 : 0)); 6951 6952 do_vec_index_to_cond_assign(ir); 6953 lower_vector_insert(ir, true); 6954 lower_quadop_vector(ir, false); 6955 lower_noise(ir); 6956 if (options->MaxIfDepth == 0) { 6957 lower_discard(ir); 6958 } 6959 6960 if (ctx->Const.GLSLOptimizeConservatively) { 6961 /* Do it once and repeat only if there's unsupported control flow. */ 6962 do { 6963 do_common_optimization(ir, true, true, options, 6964 ctx->Const.NativeIntegers); 6965 lower_if_to_cond_assign((gl_shader_stage)i, ir, 6966 options->MaxIfDepth, if_threshold); 6967 } while (has_unsupported_control_flow(ir, options)); 6968 } else { 6969 /* Repeat it until it stops making changes. */ 6970 bool progress; 6971 do { 6972 progress = do_common_optimization(ir, true, true, options, 6973 ctx->Const.NativeIntegers); 6974 progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, 6975 options->MaxIfDepth, if_threshold); 6976 } while (progress); 6977 } 6978 6979 validate_ir_tree(ir); 6980 } 6981 6982 build_program_resource_list(ctx, prog); 6983 6984 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 6985 struct gl_program *linked_prog; 6986 6987 if (prog->_LinkedShaders[i] == NULL) 6988 continue; 6989 6990 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 6991 6992 if (linked_prog) { 6993 if (!ctx->Driver.ProgramStringNotify(ctx, 6994 _mesa_shader_stage_to_program(i), 6995 linked_prog)) { 6996 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 6997 NULL); 6998 return GL_FALSE; 6999 } 7000 } 7001 } 7002 7003 return GL_TRUE; 7004} 7005 7006void 7007st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 7008 const GLuint outputMapping[], 7009 struct pipe_stream_output_info *so) 7010{ 7011 struct gl_transform_feedback_info *info = 7012 glsl_to_tgsi->shader_program->xfb_program->sh.LinkedTransformFeedback; 7013 st_translate_stream_output_info2(info, outputMapping, so); 7014} 7015 7016void 7017st_translate_stream_output_info2(struct gl_transform_feedback_info *info, 7018 const GLuint outputMapping[], 7019 struct pipe_stream_output_info *so) 7020{ 7021 unsigned i; 7022 7023 for (i = 0; i < info->NumOutputs; i++) { 7024 so->output[i].register_index = 7025 outputMapping[info->Outputs[i].OutputRegister]; 7026 so->output[i].start_component = info->Outputs[i].ComponentOffset; 7027 so->output[i].num_components = info->Outputs[i].NumComponents; 7028 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 7029 so->output[i].dst_offset = info->Outputs[i].DstOffset; 7030 so->output[i].stream = info->Outputs[i].StreamId; 7031 } 7032 7033 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 7034 so->stride[i] = info->Buffers[i].Stride; 7035 } 7036 so->num_outputs = info->NumOutputs; 7037} 7038 7039} /* extern "C" */ 7040