brw_fs.cpp revision df2aef0e197f9276f60a8e755260420c90841269
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44} 45#include "brw_fs.h" 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir_optimization.h" 48#include "../glsl/ir_print_visitor.h" 49 50#define MAX_INSTRUCTION (1 << 30) 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = rzalloc(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = rzalloc(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct brw_context *brw = brw_context(ctx); 93 struct intel_context *intel = &brw->intel; 94 95 struct brw_shader *shader = 96 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 97 if (shader != NULL) { 98 void *mem_ctx = ralloc_context(NULL); 99 bool progress; 100 101 if (shader->ir) 102 ralloc_free(shader->ir); 103 shader->ir = new(shader) exec_list; 104 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 105 106 do_mat_op_to_vec(shader->ir); 107 lower_instructions(shader->ir, 108 MOD_TO_FRACT | 109 DIV_TO_MUL_RCP | 110 SUB_TO_ADD_NEG | 111 EXP_TO_EXP2 | 112 LOG_TO_LOG2); 113 114 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, 115 * if-statements need to be flattened. 116 */ 117 if (intel->gen < 6) 118 lower_if_to_cond_assign(shader->ir, 16); 119 120 do_lower_texture_projection(shader->ir); 121 do_vec_index_to_cond_assign(shader->ir); 122 brw_do_cubemap_normalize(shader->ir); 123 124 do { 125 progress = false; 126 127 brw_do_channel_expressions(shader->ir); 128 brw_do_vector_splitting(shader->ir); 129 130 progress = do_lower_jumps(shader->ir, true, true, 131 true, /* main return */ 132 false, /* continue */ 133 false /* loops */ 134 ) || progress; 135 136 progress = do_common_optimization(shader->ir, true, 32) || progress; 137 138 progress = lower_noise(shader->ir) || progress; 139 progress = 140 lower_variable_index_to_cond_assign(shader->ir, 141 GL_TRUE, /* input */ 142 GL_TRUE, /* output */ 143 GL_TRUE, /* temp */ 144 GL_TRUE /* uniform */ 145 ) || progress; 146 progress = lower_quadop_vector(shader->ir, false) || progress; 147 } while (progress); 148 149 validate_ir_tree(shader->ir); 150 151 reparent_ir(shader->ir, shader->ir); 152 ralloc_free(mem_ctx); 153 } 154 155 if (!_mesa_ir_link_shader(ctx, prog)) 156 return GL_FALSE; 157 158 return GL_TRUE; 159} 160 161static int 162type_size(const struct glsl_type *type) 163{ 164 unsigned int size, i; 165 166 switch (type->base_type) { 167 case GLSL_TYPE_UINT: 168 case GLSL_TYPE_INT: 169 case GLSL_TYPE_FLOAT: 170 case GLSL_TYPE_BOOL: 171 return type->components(); 172 case GLSL_TYPE_ARRAY: 173 return type_size(type->fields.array) * type->length; 174 case GLSL_TYPE_STRUCT: 175 size = 0; 176 for (i = 0; i < type->length; i++) { 177 size += type_size(type->fields.structure[i].type); 178 } 179 return size; 180 case GLSL_TYPE_SAMPLER: 181 /* Samplers take up no register space, since they're baked in at 182 * link time. 183 */ 184 return 0; 185 default: 186 assert(!"not reached"); 187 return 0; 188 } 189} 190 191/** 192 * Returns how many MRFs an FS opcode will write over. 193 * 194 * Note that this is not the 0 or 1 implied writes in an actual gen 195 * instruction -- the FS opcodes often generate MOVs in addition. 196 */ 197int 198fs_visitor::implied_mrf_writes(fs_inst *inst) 199{ 200 if (inst->mlen == 0) 201 return 0; 202 203 switch (inst->opcode) { 204 case FS_OPCODE_RCP: 205 case FS_OPCODE_RSQ: 206 case FS_OPCODE_SQRT: 207 case FS_OPCODE_EXP2: 208 case FS_OPCODE_LOG2: 209 case FS_OPCODE_SIN: 210 case FS_OPCODE_COS: 211 return 1; 212 case FS_OPCODE_POW: 213 return 2; 214 case FS_OPCODE_TEX: 215 case FS_OPCODE_TXB: 216 case FS_OPCODE_TXL: 217 return 1; 218 case FS_OPCODE_FB_WRITE: 219 return 2; 220 case FS_OPCODE_PULL_CONSTANT_LOAD: 221 case FS_OPCODE_UNSPILL: 222 return 1; 223 case FS_OPCODE_SPILL: 224 return 2; 225 default: 226 assert(!"not reached"); 227 return inst->mlen; 228 } 229} 230 231int 232fs_visitor::virtual_grf_alloc(int size) 233{ 234 if (virtual_grf_array_size <= virtual_grf_next) { 235 if (virtual_grf_array_size == 0) 236 virtual_grf_array_size = 16; 237 else 238 virtual_grf_array_size *= 2; 239 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 240 virtual_grf_array_size); 241 242 /* This slot is always unused. */ 243 virtual_grf_sizes[0] = 0; 244 } 245 virtual_grf_sizes[virtual_grf_next] = size; 246 return virtual_grf_next++; 247} 248 249/** Fixed HW reg constructor. */ 250fs_reg::fs_reg(enum register_file file, int hw_reg) 251{ 252 init(); 253 this->file = file; 254 this->hw_reg = hw_reg; 255 this->type = BRW_REGISTER_TYPE_F; 256} 257 258/** Fixed HW reg constructor. */ 259fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 260{ 261 init(); 262 this->file = file; 263 this->hw_reg = hw_reg; 264 this->type = type; 265} 266 267int 268brw_type_for_base_type(const struct glsl_type *type) 269{ 270 switch (type->base_type) { 271 case GLSL_TYPE_FLOAT: 272 return BRW_REGISTER_TYPE_F; 273 case GLSL_TYPE_INT: 274 case GLSL_TYPE_BOOL: 275 return BRW_REGISTER_TYPE_D; 276 case GLSL_TYPE_UINT: 277 return BRW_REGISTER_TYPE_UD; 278 case GLSL_TYPE_ARRAY: 279 case GLSL_TYPE_STRUCT: 280 case GLSL_TYPE_SAMPLER: 281 /* These should be overridden with the type of the member when 282 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 283 * way to trip up if we don't. 284 */ 285 return BRW_REGISTER_TYPE_UD; 286 default: 287 assert(!"not reached"); 288 return BRW_REGISTER_TYPE_F; 289 } 290} 291 292/** Automatic reg constructor. */ 293fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 294{ 295 init(); 296 297 this->file = GRF; 298 this->reg = v->virtual_grf_alloc(type_size(type)); 299 this->reg_offset = 0; 300 this->type = brw_type_for_base_type(type); 301} 302 303fs_reg * 304fs_visitor::variable_storage(ir_variable *var) 305{ 306 return (fs_reg *)hash_table_find(this->variable_ht, var); 307} 308 309/* Our support for uniforms is piggy-backed on the struct 310 * gl_fragment_program, because that's where the values actually 311 * get stored, rather than in some global gl_shader_program uniform 312 * store. 313 */ 314int 315fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 316{ 317 unsigned int offset = 0; 318 319 if (type->is_matrix()) { 320 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 321 type->vector_elements, 322 1); 323 324 for (unsigned int i = 0; i < type->matrix_columns; i++) { 325 offset += setup_uniform_values(loc + offset, column); 326 } 327 328 return offset; 329 } 330 331 switch (type->base_type) { 332 case GLSL_TYPE_FLOAT: 333 case GLSL_TYPE_UINT: 334 case GLSL_TYPE_INT: 335 case GLSL_TYPE_BOOL: 336 for (unsigned int i = 0; i < type->vector_elements; i++) { 337 unsigned int param = c->prog_data.nr_params++; 338 339 assert(param < ARRAY_SIZE(c->prog_data.param)); 340 341 switch (type->base_type) { 342 case GLSL_TYPE_FLOAT: 343 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 344 break; 345 case GLSL_TYPE_UINT: 346 c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; 347 break; 348 case GLSL_TYPE_INT: 349 c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; 350 break; 351 case GLSL_TYPE_BOOL: 352 c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; 353 break; 354 default: 355 assert(!"not reached"); 356 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 357 break; 358 } 359 this->param_index[param] = loc; 360 this->param_offset[param] = i; 361 } 362 return 1; 363 364 case GLSL_TYPE_STRUCT: 365 for (unsigned int i = 0; i < type->length; i++) { 366 offset += setup_uniform_values(loc + offset, 367 type->fields.structure[i].type); 368 } 369 return offset; 370 371 case GLSL_TYPE_ARRAY: 372 for (unsigned int i = 0; i < type->length; i++) { 373 offset += setup_uniform_values(loc + offset, type->fields.array); 374 } 375 return offset; 376 377 case GLSL_TYPE_SAMPLER: 378 /* The sampler takes up a slot, but we don't use any values from it. */ 379 return 1; 380 381 default: 382 assert(!"not reached"); 383 return 0; 384 } 385} 386 387 388/* Our support for builtin uniforms is even scarier than non-builtin. 389 * It sits on top of the PROG_STATE_VAR parameters that are 390 * automatically updated from GL context state. 391 */ 392void 393fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 394{ 395 const struct gl_builtin_uniform_desc *statevar = NULL; 396 397 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 398 statevar = &_mesa_builtin_uniform_desc[i]; 399 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 400 break; 401 } 402 403 if (!statevar->name) { 404 this->fail = true; 405 printf("Failed to find builtin uniform `%s'\n", ir->name); 406 return; 407 } 408 409 int array_count; 410 if (ir->type->is_array()) { 411 array_count = ir->type->length; 412 } else { 413 array_count = 1; 414 } 415 416 for (int a = 0; a < array_count; a++) { 417 for (unsigned int i = 0; i < statevar->num_elements; i++) { 418 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 419 int tokens[STATE_LENGTH]; 420 421 memcpy(tokens, element->tokens, sizeof(element->tokens)); 422 if (ir->type->is_array()) { 423 tokens[1] = a; 424 } 425 426 /* This state reference has already been setup by ir_to_mesa, 427 * but we'll get the same index back here. 428 */ 429 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 430 (gl_state_index *)tokens); 431 432 /* Add each of the unique swizzles of the element as a 433 * parameter. This'll end up matching the expected layout of 434 * the array/matrix/structure we're trying to fill in. 435 */ 436 int last_swiz = -1; 437 for (unsigned int i = 0; i < 4; i++) { 438 int swiz = GET_SWZ(element->swizzle, i); 439 if (swiz == last_swiz) 440 break; 441 last_swiz = swiz; 442 443 c->prog_data.param_convert[c->prog_data.nr_params] = 444 PARAM_NO_CONVERT; 445 this->param_index[c->prog_data.nr_params] = index; 446 this->param_offset[c->prog_data.nr_params] = swiz; 447 c->prog_data.nr_params++; 448 } 449 } 450 } 451} 452 453fs_reg * 454fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 455{ 456 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 457 fs_reg wpos = *reg; 458 fs_reg neg_y = this->pixel_y; 459 neg_y.negate = true; 460 bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 461 462 /* gl_FragCoord.x */ 463 if (ir->pixel_center_integer) { 464 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 465 } else { 466 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 467 } 468 wpos.reg_offset++; 469 470 /* gl_FragCoord.y */ 471 if (!flip && ir->pixel_center_integer) { 472 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 473 } else { 474 fs_reg pixel_y = this->pixel_y; 475 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 476 477 if (flip) { 478 pixel_y.negate = true; 479 offset += c->key.drawable_height - 1.0; 480 } 481 482 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 483 } 484 wpos.reg_offset++; 485 486 /* gl_FragCoord.z */ 487 if (intel->gen >= 6) { 488 emit(fs_inst(BRW_OPCODE_MOV, wpos, 489 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 490 } else { 491 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 492 interp_reg(FRAG_ATTRIB_WPOS, 2))); 493 } 494 wpos.reg_offset++; 495 496 /* gl_FragCoord.w: Already set up in emit_interpolation */ 497 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 498 499 return reg; 500} 501 502fs_reg * 503fs_visitor::emit_general_interpolation(ir_variable *ir) 504{ 505 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 506 /* Interpolation is always in floating point regs. */ 507 reg->type = BRW_REGISTER_TYPE_F; 508 fs_reg attr = *reg; 509 510 unsigned int array_elements; 511 const glsl_type *type; 512 513 if (ir->type->is_array()) { 514 array_elements = ir->type->length; 515 if (array_elements == 0) { 516 this->fail = true; 517 } 518 type = ir->type->fields.array; 519 } else { 520 array_elements = 1; 521 type = ir->type; 522 } 523 524 int location = ir->location; 525 for (unsigned int i = 0; i < array_elements; i++) { 526 for (unsigned int j = 0; j < type->matrix_columns; j++) { 527 if (urb_setup[location] == -1) { 528 /* If there's no incoming setup data for this slot, don't 529 * emit interpolation for it. 530 */ 531 attr.reg_offset += type->vector_elements; 532 location++; 533 continue; 534 } 535 536 if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || 537 location == FRAG_ATTRIB_COL1)) { 538 /* Constant interpolation (flat shading) case. The SF has 539 * handed us defined values in only the constant offset 540 * field of the setup reg. 541 */ 542 for (unsigned int c = 0; c < type->vector_elements; c++) { 543 struct brw_reg interp = interp_reg(location, c); 544 interp = suboffset(interp, 3); 545 emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); 546 attr.reg_offset++; 547 } 548 } else { 549 /* Perspective interpolation case. */ 550 for (unsigned int c = 0; c < type->vector_elements; c++) { 551 struct brw_reg interp = interp_reg(location, c); 552 emit(fs_inst(FS_OPCODE_LINTERP, 553 attr, 554 this->delta_x, 555 this->delta_y, 556 fs_reg(interp))); 557 attr.reg_offset++; 558 } 559 560 if (intel->gen < 6) { 561 attr.reg_offset -= type->vector_elements; 562 for (unsigned int c = 0; c < type->vector_elements; c++) { 563 emit(fs_inst(BRW_OPCODE_MUL, 564 attr, 565 attr, 566 this->pixel_w)); 567 attr.reg_offset++; 568 } 569 } 570 } 571 location++; 572 } 573 } 574 575 return reg; 576} 577 578fs_reg * 579fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 580{ 581 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 582 583 /* The frontfacing comes in as a bit in the thread payload. */ 584 if (intel->gen >= 6) { 585 emit(fs_inst(BRW_OPCODE_ASR, 586 *reg, 587 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 588 fs_reg(15))); 589 emit(fs_inst(BRW_OPCODE_NOT, 590 *reg, 591 *reg)); 592 emit(fs_inst(BRW_OPCODE_AND, 593 *reg, 594 *reg, 595 fs_reg(1))); 596 } else { 597 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 598 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 599 * us front face 600 */ 601 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 602 *reg, 603 fs_reg(r1_6ud), 604 fs_reg(1u << 31))); 605 inst->conditional_mod = BRW_CONDITIONAL_L; 606 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 607 } 608 609 return reg; 610} 611 612fs_inst * 613fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 614{ 615 switch (opcode) { 616 case FS_OPCODE_RCP: 617 case FS_OPCODE_RSQ: 618 case FS_OPCODE_SQRT: 619 case FS_OPCODE_EXP2: 620 case FS_OPCODE_LOG2: 621 case FS_OPCODE_SIN: 622 case FS_OPCODE_COS: 623 break; 624 default: 625 assert(!"not reached: bad math opcode"); 626 return NULL; 627 } 628 629 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 630 * might be able to do better by doing execsize = 1 math and then 631 * expanding that result out, but we would need to be careful with 632 * masking. 633 * 634 * The hardware ignores source modifiers (negate and abs) on math 635 * instructions, so we also move to a temp to set those up. 636 */ 637 if (intel->gen >= 6 && (src.file == UNIFORM || 638 src.abs || 639 src.negate)) { 640 fs_reg expanded = fs_reg(this, glsl_type::float_type); 641 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 642 src = expanded; 643 } 644 645 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 646 647 if (intel->gen < 6) { 648 inst->base_mrf = 2; 649 inst->mlen = 1; 650 } 651 652 return inst; 653} 654 655fs_inst * 656fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 657{ 658 int base_mrf = 2; 659 fs_inst *inst; 660 661 assert(opcode == FS_OPCODE_POW); 662 663 if (intel->gen >= 6) { 664 /* Can't do hstride == 0 args to gen6 math, so expand it out. 665 * 666 * The hardware ignores source modifiers (negate and abs) on math 667 * instructions, so we also move to a temp to set those up. 668 */ 669 if (src0.file == UNIFORM || src0.abs || src0.negate) { 670 fs_reg expanded = fs_reg(this, glsl_type::float_type); 671 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 672 src0 = expanded; 673 } 674 675 if (src1.file == UNIFORM || src1.abs || src1.negate) { 676 fs_reg expanded = fs_reg(this, glsl_type::float_type); 677 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 678 src1 = expanded; 679 } 680 681 inst = emit(fs_inst(opcode, dst, src0, src1)); 682 } else { 683 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 684 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 685 686 inst->base_mrf = base_mrf; 687 inst->mlen = 2; 688 } 689 return inst; 690} 691 692void 693fs_visitor::visit(ir_variable *ir) 694{ 695 fs_reg *reg = NULL; 696 697 if (variable_storage(ir)) 698 return; 699 700 if (strcmp(ir->name, "gl_FragColor") == 0) { 701 this->frag_color = ir; 702 } else if (strcmp(ir->name, "gl_FragData") == 0) { 703 this->frag_data = ir; 704 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 705 this->frag_depth = ir; 706 } 707 708 if (ir->mode == ir_var_in) { 709 if (!strcmp(ir->name, "gl_FragCoord")) { 710 reg = emit_fragcoord_interpolation(ir); 711 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 712 reg = emit_frontfacing_interpolation(ir); 713 } else { 714 reg = emit_general_interpolation(ir); 715 } 716 assert(reg); 717 hash_table_insert(this->variable_ht, reg, ir); 718 return; 719 } 720 721 if (ir->mode == ir_var_uniform) { 722 int param_index = c->prog_data.nr_params; 723 724 if (!strncmp(ir->name, "gl_", 3)) { 725 setup_builtin_uniform_values(ir); 726 } else { 727 setup_uniform_values(ir->location, ir->type); 728 } 729 730 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 731 reg->type = brw_type_for_base_type(ir->type); 732 } 733 734 if (!reg) 735 reg = new(this->mem_ctx) fs_reg(this, ir->type); 736 737 hash_table_insert(this->variable_ht, reg, ir); 738} 739 740void 741fs_visitor::visit(ir_dereference_variable *ir) 742{ 743 fs_reg *reg = variable_storage(ir->var); 744 this->result = *reg; 745} 746 747void 748fs_visitor::visit(ir_dereference_record *ir) 749{ 750 const glsl_type *struct_type = ir->record->type; 751 752 ir->record->accept(this); 753 754 unsigned int offset = 0; 755 for (unsigned int i = 0; i < struct_type->length; i++) { 756 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 757 break; 758 offset += type_size(struct_type->fields.structure[i].type); 759 } 760 this->result.reg_offset += offset; 761 this->result.type = brw_type_for_base_type(ir->type); 762} 763 764void 765fs_visitor::visit(ir_dereference_array *ir) 766{ 767 ir_constant *index; 768 int element_size; 769 770 ir->array->accept(this); 771 index = ir->array_index->as_constant(); 772 773 element_size = type_size(ir->type); 774 this->result.type = brw_type_for_base_type(ir->type); 775 776 if (index) { 777 assert(this->result.file == UNIFORM || 778 (this->result.file == GRF && 779 this->result.reg != 0)); 780 this->result.reg_offset += index->value.i[0] * element_size; 781 } else { 782 assert(!"FINISHME: non-constant array element"); 783 } 784} 785 786/* Instruction selection: Produce a MOV.sat instead of 787 * MIN(MAX(val, 0), 1) when possible. 788 */ 789bool 790fs_visitor::try_emit_saturate(ir_expression *ir) 791{ 792 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 793 794 if (!sat_val) 795 return false; 796 797 sat_val->accept(this); 798 fs_reg src = this->result; 799 800 this->result = fs_reg(this, ir->type); 801 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); 802 inst->saturate = true; 803 804 return true; 805} 806 807static uint32_t 808brw_conditional_for_comparison(unsigned int op) 809{ 810 switch (op) { 811 case ir_binop_less: 812 return BRW_CONDITIONAL_L; 813 case ir_binop_greater: 814 return BRW_CONDITIONAL_G; 815 case ir_binop_lequal: 816 return BRW_CONDITIONAL_LE; 817 case ir_binop_gequal: 818 return BRW_CONDITIONAL_GE; 819 case ir_binop_equal: 820 case ir_binop_all_equal: /* same as equal for scalars */ 821 return BRW_CONDITIONAL_Z; 822 case ir_binop_nequal: 823 case ir_binop_any_nequal: /* same as nequal for scalars */ 824 return BRW_CONDITIONAL_NZ; 825 default: 826 assert(!"not reached: bad operation for comparison"); 827 return BRW_CONDITIONAL_NZ; 828 } 829} 830 831void 832fs_visitor::visit(ir_expression *ir) 833{ 834 unsigned int operand; 835 fs_reg op[2], temp; 836 fs_inst *inst; 837 838 assert(ir->get_num_operands() <= 2); 839 840 if (try_emit_saturate(ir)) 841 return; 842 843 for (operand = 0; operand < ir->get_num_operands(); operand++) { 844 ir->operands[operand]->accept(this); 845 if (this->result.file == BAD_FILE) { 846 ir_print_visitor v; 847 printf("Failed to get tree for expression operand:\n"); 848 ir->operands[operand]->accept(&v); 849 this->fail = true; 850 } 851 op[operand] = this->result; 852 853 /* Matrix expression operands should have been broken down to vector 854 * operations already. 855 */ 856 assert(!ir->operands[operand]->type->is_matrix()); 857 /* And then those vector operands should have been broken down to scalar. 858 */ 859 assert(!ir->operands[operand]->type->is_vector()); 860 } 861 862 /* Storage for our result. If our result goes into an assignment, it will 863 * just get copy-propagated out, so no worries. 864 */ 865 this->result = fs_reg(this, ir->type); 866 867 switch (ir->operation) { 868 case ir_unop_logic_not: 869 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 870 * ones complement of the whole register, not just bit 0. 871 */ 872 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 873 break; 874 case ir_unop_neg: 875 op[0].negate = !op[0].negate; 876 this->result = op[0]; 877 break; 878 case ir_unop_abs: 879 op[0].abs = true; 880 op[0].negate = false; 881 this->result = op[0]; 882 break; 883 case ir_unop_sign: 884 temp = fs_reg(this, ir->type); 885 886 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 887 888 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 889 inst->conditional_mod = BRW_CONDITIONAL_G; 890 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 891 inst->predicated = true; 892 893 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 894 inst->conditional_mod = BRW_CONDITIONAL_L; 895 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 896 inst->predicated = true; 897 898 break; 899 case ir_unop_rcp: 900 emit_math(FS_OPCODE_RCP, this->result, op[0]); 901 break; 902 903 case ir_unop_exp2: 904 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 905 break; 906 case ir_unop_log2: 907 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 908 break; 909 case ir_unop_exp: 910 case ir_unop_log: 911 assert(!"not reached: should be handled by ir_explog_to_explog2"); 912 break; 913 case ir_unop_sin: 914 case ir_unop_sin_reduced: 915 emit_math(FS_OPCODE_SIN, this->result, op[0]); 916 break; 917 case ir_unop_cos: 918 case ir_unop_cos_reduced: 919 emit_math(FS_OPCODE_COS, this->result, op[0]); 920 break; 921 922 case ir_unop_dFdx: 923 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 924 break; 925 case ir_unop_dFdy: 926 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 927 break; 928 929 case ir_binop_add: 930 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 931 break; 932 case ir_binop_sub: 933 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 934 break; 935 936 case ir_binop_mul: 937 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 938 break; 939 case ir_binop_div: 940 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 941 break; 942 case ir_binop_mod: 943 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 944 break; 945 946 case ir_binop_less: 947 case ir_binop_greater: 948 case ir_binop_lequal: 949 case ir_binop_gequal: 950 case ir_binop_equal: 951 case ir_binop_all_equal: 952 case ir_binop_nequal: 953 case ir_binop_any_nequal: 954 temp = this->result; 955 /* original gen4 does implicit conversion before comparison. */ 956 if (intel->gen < 5) 957 temp.type = op[0].type; 958 959 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); 960 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 961 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 962 break; 963 964 case ir_binop_logic_xor: 965 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 966 break; 967 968 case ir_binop_logic_or: 969 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 970 break; 971 972 case ir_binop_logic_and: 973 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 974 break; 975 976 case ir_binop_dot: 977 case ir_unop_any: 978 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 979 break; 980 981 case ir_unop_noise: 982 assert(!"not reached: should be handled by lower_noise"); 983 break; 984 985 case ir_quadop_vector: 986 assert(!"not reached: should be handled by lower_quadop_vector"); 987 break; 988 989 case ir_unop_sqrt: 990 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 991 break; 992 993 case ir_unop_rsq: 994 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 995 break; 996 997 case ir_unop_i2f: 998 case ir_unop_b2f: 999 case ir_unop_b2i: 1000 case ir_unop_f2i: 1001 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1002 break; 1003 case ir_unop_f2b: 1004 case ir_unop_i2b: 1005 temp = this->result; 1006 /* original gen4 does implicit conversion before comparison. */ 1007 if (intel->gen < 5) 1008 temp.type = op[0].type; 1009 1010 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); 1011 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1012 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 1013 this->result, fs_reg(1))); 1014 break; 1015 1016 case ir_unop_trunc: 1017 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 1018 break; 1019 case ir_unop_ceil: 1020 op[0].negate = !op[0].negate; 1021 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1022 this->result.negate = true; 1023 break; 1024 case ir_unop_floor: 1025 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1026 break; 1027 case ir_unop_fract: 1028 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1029 break; 1030 case ir_unop_round_even: 1031 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 1032 break; 1033 1034 case ir_binop_min: 1035 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1036 inst->conditional_mod = BRW_CONDITIONAL_L; 1037 1038 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1039 inst->predicated = true; 1040 break; 1041 case ir_binop_max: 1042 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1043 inst->conditional_mod = BRW_CONDITIONAL_G; 1044 1045 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1046 inst->predicated = true; 1047 break; 1048 1049 case ir_binop_pow: 1050 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 1051 break; 1052 1053 case ir_unop_bit_not: 1054 inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); 1055 break; 1056 case ir_binop_bit_and: 1057 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1058 break; 1059 case ir_binop_bit_xor: 1060 inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1061 break; 1062 case ir_binop_bit_or: 1063 inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1064 break; 1065 1066 case ir_unop_u2f: 1067 case ir_binop_lshift: 1068 case ir_binop_rshift: 1069 assert(!"GLSL 1.30 features unsupported"); 1070 break; 1071 } 1072} 1073 1074void 1075fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1076 const glsl_type *type, bool predicated) 1077{ 1078 switch (type->base_type) { 1079 case GLSL_TYPE_FLOAT: 1080 case GLSL_TYPE_UINT: 1081 case GLSL_TYPE_INT: 1082 case GLSL_TYPE_BOOL: 1083 for (unsigned int i = 0; i < type->components(); i++) { 1084 l.type = brw_type_for_base_type(type); 1085 r.type = brw_type_for_base_type(type); 1086 1087 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1088 inst->predicated = predicated; 1089 1090 l.reg_offset++; 1091 r.reg_offset++; 1092 } 1093 break; 1094 case GLSL_TYPE_ARRAY: 1095 for (unsigned int i = 0; i < type->length; i++) { 1096 emit_assignment_writes(l, r, type->fields.array, predicated); 1097 } 1098 break; 1099 1100 case GLSL_TYPE_STRUCT: 1101 for (unsigned int i = 0; i < type->length; i++) { 1102 emit_assignment_writes(l, r, type->fields.structure[i].type, 1103 predicated); 1104 } 1105 break; 1106 1107 case GLSL_TYPE_SAMPLER: 1108 break; 1109 1110 default: 1111 assert(!"not reached"); 1112 break; 1113 } 1114} 1115 1116void 1117fs_visitor::visit(ir_assignment *ir) 1118{ 1119 struct fs_reg l, r; 1120 fs_inst *inst; 1121 1122 /* FINISHME: arrays on the lhs */ 1123 ir->lhs->accept(this); 1124 l = this->result; 1125 1126 ir->rhs->accept(this); 1127 r = this->result; 1128 1129 assert(l.file != BAD_FILE); 1130 assert(r.file != BAD_FILE); 1131 1132 if (ir->condition) { 1133 emit_bool_to_cond_code(ir->condition); 1134 } 1135 1136 if (ir->lhs->type->is_scalar() || 1137 ir->lhs->type->is_vector()) { 1138 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1139 if (ir->write_mask & (1 << i)) { 1140 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1141 if (ir->condition) 1142 inst->predicated = true; 1143 r.reg_offset++; 1144 } 1145 l.reg_offset++; 1146 } 1147 } else { 1148 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1149 } 1150} 1151 1152fs_inst * 1153fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1154{ 1155 int mlen; 1156 int base_mrf = 1; 1157 bool simd16 = false; 1158 fs_reg orig_dst; 1159 1160 /* g0 header. */ 1161 mlen = 1; 1162 1163 if (ir->shadow_comparitor) { 1164 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1165 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1166 coordinate)); 1167 coordinate.reg_offset++; 1168 } 1169 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1170 mlen += 3; 1171 1172 if (ir->op == ir_tex) { 1173 /* There's no plain shadow compare message, so we use shadow 1174 * compare with a bias of 0.0. 1175 */ 1176 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1177 fs_reg(0.0f))); 1178 mlen++; 1179 } else if (ir->op == ir_txb) { 1180 ir->lod_info.bias->accept(this); 1181 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1182 this->result)); 1183 mlen++; 1184 } else { 1185 assert(ir->op == ir_txl); 1186 ir->lod_info.lod->accept(this); 1187 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1188 this->result)); 1189 mlen++; 1190 } 1191 1192 ir->shadow_comparitor->accept(this); 1193 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1194 mlen++; 1195 } else if (ir->op == ir_tex) { 1196 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1197 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1198 coordinate)); 1199 coordinate.reg_offset++; 1200 } 1201 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1202 mlen += 3; 1203 } else { 1204 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1205 * instructions. We'll need to do SIMD16 here. 1206 */ 1207 assert(ir->op == ir_txb || ir->op == ir_txl); 1208 1209 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1210 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1211 coordinate)); 1212 coordinate.reg_offset++; 1213 } 1214 1215 /* lod/bias appears after u/v/r. */ 1216 mlen += 6; 1217 1218 if (ir->op == ir_txb) { 1219 ir->lod_info.bias->accept(this); 1220 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1221 this->result)); 1222 mlen++; 1223 } else { 1224 ir->lod_info.lod->accept(this); 1225 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1226 this->result)); 1227 mlen++; 1228 } 1229 1230 /* The unused upper half. */ 1231 mlen++; 1232 1233 /* Now, since we're doing simd16, the return is 2 interleaved 1234 * vec4s where the odd-indexed ones are junk. We'll need to move 1235 * this weirdness around to the expected layout. 1236 */ 1237 simd16 = true; 1238 orig_dst = dst; 1239 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1240 2)); 1241 dst.type = BRW_REGISTER_TYPE_F; 1242 } 1243 1244 fs_inst *inst = NULL; 1245 switch (ir->op) { 1246 case ir_tex: 1247 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1248 break; 1249 case ir_txb: 1250 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1251 break; 1252 case ir_txl: 1253 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1254 break; 1255 case ir_txd: 1256 case ir_txf: 1257 assert(!"GLSL 1.30 features unsupported"); 1258 break; 1259 } 1260 inst->base_mrf = base_mrf; 1261 inst->mlen = mlen; 1262 1263 if (simd16) { 1264 for (int i = 0; i < 4; i++) { 1265 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1266 orig_dst.reg_offset++; 1267 dst.reg_offset += 2; 1268 } 1269 } 1270 1271 return inst; 1272} 1273 1274fs_inst * 1275fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1276{ 1277 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1278 * optional parameters like shadow comparitor or LOD bias. If 1279 * optional parameters aren't present, those base slots are 1280 * optional and don't need to be included in the message. 1281 * 1282 * We don't fill in the unnecessary slots regardless, which may 1283 * look surprising in the disassembly. 1284 */ 1285 int mlen = 1; /* g0 header always present. */ 1286 int base_mrf = 1; 1287 1288 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1289 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1290 coordinate)); 1291 coordinate.reg_offset++; 1292 } 1293 mlen += ir->coordinate->type->vector_elements; 1294 1295 if (ir->shadow_comparitor) { 1296 mlen = MAX2(mlen, 5); 1297 1298 ir->shadow_comparitor->accept(this); 1299 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1300 mlen++; 1301 } 1302 1303 fs_inst *inst = NULL; 1304 switch (ir->op) { 1305 case ir_tex: 1306 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1307 break; 1308 case ir_txb: 1309 ir->lod_info.bias->accept(this); 1310 mlen = MAX2(mlen, 5); 1311 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1312 mlen++; 1313 1314 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1315 break; 1316 case ir_txl: 1317 ir->lod_info.lod->accept(this); 1318 mlen = MAX2(mlen, 5); 1319 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1320 mlen++; 1321 1322 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1323 break; 1324 case ir_txd: 1325 case ir_txf: 1326 assert(!"GLSL 1.30 features unsupported"); 1327 break; 1328 } 1329 inst->base_mrf = base_mrf; 1330 inst->mlen = mlen; 1331 1332 return inst; 1333} 1334 1335void 1336fs_visitor::visit(ir_texture *ir) 1337{ 1338 int sampler; 1339 fs_inst *inst = NULL; 1340 1341 ir->coordinate->accept(this); 1342 fs_reg coordinate = this->result; 1343 1344 if (ir->offset != NULL) { 1345 ir_constant *offset = ir->offset->as_constant(); 1346 assert(offset != NULL); 1347 1348 signed char offsets[3]; 1349 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) 1350 offsets[i] = (signed char) offset->value.i[i]; 1351 1352 /* Combine all three offsets into a single unsigned dword: 1353 * 1354 * bits 11:8 - U Offset (X component) 1355 * bits 7:4 - V Offset (Y component) 1356 * bits 3:0 - R Offset (Z component) 1357 */ 1358 unsigned offset_bits = 0; 1359 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { 1360 const unsigned shift = 4 * (2 - i); 1361 offset_bits |= (offsets[i] << shift) & (0xF << shift); 1362 } 1363 1364 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 1365 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 1366 fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD))); 1367 1368 /* Then set the offset bits in DWord 2 of the message header. */ 1369 emit(fs_inst(BRW_OPCODE_MOV, 1370 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1371 BRW_REGISTER_TYPE_UD)), 1372 fs_reg(brw_imm_uw(offset_bits)))); 1373 } 1374 1375 /* Should be lowered by do_lower_texture_projection */ 1376 assert(!ir->projector); 1377 1378 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1379 ctx->Shader.CurrentFragmentProgram, 1380 &brw->fragment_program->Base); 1381 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1382 1383 /* The 965 requires the EU to do the normalization of GL rectangle 1384 * texture coordinates. We use the program parameter state 1385 * tracking to get the scaling factor. 1386 */ 1387 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1388 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1389 int tokens[STATE_LENGTH] = { 1390 STATE_INTERNAL, 1391 STATE_TEXRECT_SCALE, 1392 sampler, 1393 0, 1394 0 1395 }; 1396 1397 c->prog_data.param_convert[c->prog_data.nr_params] = 1398 PARAM_NO_CONVERT; 1399 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1400 PARAM_NO_CONVERT; 1401 1402 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1403 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1404 GLuint index = _mesa_add_state_reference(params, 1405 (gl_state_index *)tokens); 1406 1407 this->param_index[c->prog_data.nr_params] = index; 1408 this->param_offset[c->prog_data.nr_params] = 0; 1409 c->prog_data.nr_params++; 1410 this->param_index[c->prog_data.nr_params] = index; 1411 this->param_offset[c->prog_data.nr_params] = 1; 1412 c->prog_data.nr_params++; 1413 1414 fs_reg dst = fs_reg(this, ir->coordinate->type); 1415 fs_reg src = coordinate; 1416 coordinate = dst; 1417 1418 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1419 dst.reg_offset++; 1420 src.reg_offset++; 1421 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1422 } 1423 1424 /* Writemasking doesn't eliminate channels on SIMD8 texture 1425 * samples, so don't worry about them. 1426 */ 1427 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1428 1429 if (intel->gen < 5) { 1430 inst = emit_texture_gen4(ir, dst, coordinate); 1431 } else { 1432 inst = emit_texture_gen5(ir, dst, coordinate); 1433 } 1434 1435 /* If there's an offset, we already set up m1. To avoid the implied move, 1436 * use the null register. Otherwise, we want an implied move from g0. 1437 */ 1438 if (ir->offset != NULL) 1439 inst->src[0] = fs_reg(brw_null_reg()); 1440 else 1441 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1442 1443 inst->sampler = sampler; 1444 1445 this->result = dst; 1446 1447 if (ir->shadow_comparitor) 1448 inst->shadow_compare = true; 1449 1450 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1451 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1452 1453 for (int i = 0; i < 4; i++) { 1454 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1455 fs_reg l = swizzle_dst; 1456 l.reg_offset += i; 1457 1458 if (swiz == SWIZZLE_ZERO) { 1459 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1460 } else if (swiz == SWIZZLE_ONE) { 1461 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1462 } else { 1463 fs_reg r = dst; 1464 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1465 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1466 } 1467 } 1468 this->result = swizzle_dst; 1469 } 1470} 1471 1472void 1473fs_visitor::visit(ir_swizzle *ir) 1474{ 1475 ir->val->accept(this); 1476 fs_reg val = this->result; 1477 1478 if (ir->type->vector_elements == 1) { 1479 this->result.reg_offset += ir->mask.x; 1480 return; 1481 } 1482 1483 fs_reg result = fs_reg(this, ir->type); 1484 this->result = result; 1485 1486 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1487 fs_reg channel = val; 1488 int swiz = 0; 1489 1490 switch (i) { 1491 case 0: 1492 swiz = ir->mask.x; 1493 break; 1494 case 1: 1495 swiz = ir->mask.y; 1496 break; 1497 case 2: 1498 swiz = ir->mask.z; 1499 break; 1500 case 3: 1501 swiz = ir->mask.w; 1502 break; 1503 } 1504 1505 channel.reg_offset += swiz; 1506 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1507 result.reg_offset++; 1508 } 1509} 1510 1511void 1512fs_visitor::visit(ir_discard *ir) 1513{ 1514 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1515 1516 assert(ir->condition == NULL); /* FINISHME */ 1517 1518 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1519 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1520 kill_emitted = true; 1521} 1522 1523void 1524fs_visitor::visit(ir_constant *ir) 1525{ 1526 /* Set this->result to reg at the bottom of the function because some code 1527 * paths will cause this visitor to be applied to other fields. This will 1528 * cause the value stored in this->result to be modified. 1529 * 1530 * Make reg constant so that it doesn't get accidentally modified along the 1531 * way. Yes, I actually had this problem. :( 1532 */ 1533 const fs_reg reg(this, ir->type); 1534 fs_reg dst_reg = reg; 1535 1536 if (ir->type->is_array()) { 1537 const unsigned size = type_size(ir->type->fields.array); 1538 1539 for (unsigned i = 0; i < ir->type->length; i++) { 1540 ir->array_elements[i]->accept(this); 1541 fs_reg src_reg = this->result; 1542 1543 dst_reg.type = src_reg.type; 1544 for (unsigned j = 0; j < size; j++) { 1545 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1546 src_reg.reg_offset++; 1547 dst_reg.reg_offset++; 1548 } 1549 } 1550 } else if (ir->type->is_record()) { 1551 foreach_list(node, &ir->components) { 1552 ir_instruction *const field = (ir_instruction *) node; 1553 const unsigned size = type_size(field->type); 1554 1555 field->accept(this); 1556 fs_reg src_reg = this->result; 1557 1558 dst_reg.type = src_reg.type; 1559 for (unsigned j = 0; j < size; j++) { 1560 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1561 src_reg.reg_offset++; 1562 dst_reg.reg_offset++; 1563 } 1564 } 1565 } else { 1566 const unsigned size = type_size(ir->type); 1567 1568 for (unsigned i = 0; i < size; i++) { 1569 switch (ir->type->base_type) { 1570 case GLSL_TYPE_FLOAT: 1571 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); 1572 break; 1573 case GLSL_TYPE_UINT: 1574 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); 1575 break; 1576 case GLSL_TYPE_INT: 1577 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); 1578 break; 1579 case GLSL_TYPE_BOOL: 1580 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); 1581 break; 1582 default: 1583 assert(!"Non-float/uint/int/bool constant"); 1584 } 1585 dst_reg.reg_offset++; 1586 } 1587 } 1588 1589 this->result = reg; 1590} 1591 1592void 1593fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1594{ 1595 ir_expression *expr = ir->as_expression(); 1596 1597 if (expr) { 1598 fs_reg op[2]; 1599 fs_inst *inst; 1600 1601 assert(expr->get_num_operands() <= 2); 1602 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1603 assert(expr->operands[i]->type->is_scalar()); 1604 1605 expr->operands[i]->accept(this); 1606 op[i] = this->result; 1607 } 1608 1609 switch (expr->operation) { 1610 case ir_unop_logic_not: 1611 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1612 inst->conditional_mod = BRW_CONDITIONAL_Z; 1613 break; 1614 1615 case ir_binop_logic_xor: 1616 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1617 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1618 break; 1619 1620 case ir_binop_logic_or: 1621 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1622 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1623 break; 1624 1625 case ir_binop_logic_and: 1626 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1627 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1628 break; 1629 1630 case ir_unop_f2b: 1631 if (intel->gen >= 6) { 1632 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1633 op[0], fs_reg(0.0f))); 1634 } else { 1635 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); 1636 } 1637 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1638 break; 1639 1640 case ir_unop_i2b: 1641 if (intel->gen >= 6) { 1642 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1643 } else { 1644 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1645 } 1646 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1647 break; 1648 1649 case ir_binop_greater: 1650 case ir_binop_gequal: 1651 case ir_binop_less: 1652 case ir_binop_lequal: 1653 case ir_binop_equal: 1654 case ir_binop_all_equal: 1655 case ir_binop_nequal: 1656 case ir_binop_any_nequal: 1657 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); 1658 inst->conditional_mod = 1659 brw_conditional_for_comparison(expr->operation); 1660 break; 1661 1662 default: 1663 assert(!"not reached"); 1664 this->fail = true; 1665 break; 1666 } 1667 return; 1668 } 1669 1670 ir->accept(this); 1671 1672 if (intel->gen >= 6) { 1673 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1674 this->result, fs_reg(1))); 1675 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1676 } else { 1677 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1678 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1679 } 1680} 1681 1682/** 1683 * Emit a gen6 IF statement with the comparison folded into the IF 1684 * instruction. 1685 */ 1686void 1687fs_visitor::emit_if_gen6(ir_if *ir) 1688{ 1689 ir_expression *expr = ir->condition->as_expression(); 1690 1691 if (expr) { 1692 fs_reg op[2]; 1693 fs_inst *inst; 1694 fs_reg temp; 1695 1696 assert(expr->get_num_operands() <= 2); 1697 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1698 assert(expr->operands[i]->type->is_scalar()); 1699 1700 expr->operands[i]->accept(this); 1701 op[i] = this->result; 1702 } 1703 1704 switch (expr->operation) { 1705 case ir_unop_logic_not: 1706 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); 1707 inst->conditional_mod = BRW_CONDITIONAL_Z; 1708 return; 1709 1710 case ir_binop_logic_xor: 1711 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1712 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1713 return; 1714 1715 case ir_binop_logic_or: 1716 temp = fs_reg(this, glsl_type::bool_type); 1717 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1718 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1719 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1720 return; 1721 1722 case ir_binop_logic_and: 1723 temp = fs_reg(this, glsl_type::bool_type); 1724 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1725 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1726 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1727 return; 1728 1729 case ir_unop_f2b: 1730 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1731 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1732 return; 1733 1734 case ir_unop_i2b: 1735 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1736 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1737 return; 1738 1739 case ir_binop_greater: 1740 case ir_binop_gequal: 1741 case ir_binop_less: 1742 case ir_binop_lequal: 1743 case ir_binop_equal: 1744 case ir_binop_all_equal: 1745 case ir_binop_nequal: 1746 case ir_binop_any_nequal: 1747 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1748 inst->conditional_mod = 1749 brw_conditional_for_comparison(expr->operation); 1750 return; 1751 default: 1752 assert(!"not reached"); 1753 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1754 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1755 this->fail = true; 1756 return; 1757 } 1758 return; 1759 } 1760 1761 ir->condition->accept(this); 1762 1763 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1764 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1765} 1766 1767void 1768fs_visitor::visit(ir_if *ir) 1769{ 1770 fs_inst *inst; 1771 1772 /* Don't point the annotation at the if statement, because then it plus 1773 * the then and else blocks get printed. 1774 */ 1775 this->base_ir = ir->condition; 1776 1777 if (intel->gen >= 6) { 1778 emit_if_gen6(ir); 1779 } else { 1780 emit_bool_to_cond_code(ir->condition); 1781 1782 inst = emit(fs_inst(BRW_OPCODE_IF)); 1783 inst->predicated = true; 1784 } 1785 1786 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1787 ir_instruction *ir = (ir_instruction *)iter.get(); 1788 this->base_ir = ir; 1789 1790 ir->accept(this); 1791 } 1792 1793 if (!ir->else_instructions.is_empty()) { 1794 emit(fs_inst(BRW_OPCODE_ELSE)); 1795 1796 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1797 ir_instruction *ir = (ir_instruction *)iter.get(); 1798 this->base_ir = ir; 1799 1800 ir->accept(this); 1801 } 1802 } 1803 1804 emit(fs_inst(BRW_OPCODE_ENDIF)); 1805} 1806 1807void 1808fs_visitor::visit(ir_loop *ir) 1809{ 1810 fs_reg counter = reg_undef; 1811 1812 if (ir->counter) { 1813 this->base_ir = ir->counter; 1814 ir->counter->accept(this); 1815 counter = *(variable_storage(ir->counter)); 1816 1817 if (ir->from) { 1818 this->base_ir = ir->from; 1819 ir->from->accept(this); 1820 1821 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1822 } 1823 } 1824 1825 emit(fs_inst(BRW_OPCODE_DO)); 1826 1827 if (ir->to) { 1828 this->base_ir = ir->to; 1829 ir->to->accept(this); 1830 1831 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, 1832 counter, this->result)); 1833 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1834 1835 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1836 inst->predicated = true; 1837 } 1838 1839 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1840 ir_instruction *ir = (ir_instruction *)iter.get(); 1841 1842 this->base_ir = ir; 1843 ir->accept(this); 1844 } 1845 1846 if (ir->increment) { 1847 this->base_ir = ir->increment; 1848 ir->increment->accept(this); 1849 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1850 } 1851 1852 emit(fs_inst(BRW_OPCODE_WHILE)); 1853} 1854 1855void 1856fs_visitor::visit(ir_loop_jump *ir) 1857{ 1858 switch (ir->mode) { 1859 case ir_loop_jump::jump_break: 1860 emit(fs_inst(BRW_OPCODE_BREAK)); 1861 break; 1862 case ir_loop_jump::jump_continue: 1863 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1864 break; 1865 } 1866} 1867 1868void 1869fs_visitor::visit(ir_call *ir) 1870{ 1871 assert(!"FINISHME"); 1872} 1873 1874void 1875fs_visitor::visit(ir_return *ir) 1876{ 1877 assert(!"FINISHME"); 1878} 1879 1880void 1881fs_visitor::visit(ir_function *ir) 1882{ 1883 /* Ignore function bodies other than main() -- we shouldn't see calls to 1884 * them since they should all be inlined before we get to ir_to_mesa. 1885 */ 1886 if (strcmp(ir->name, "main") == 0) { 1887 const ir_function_signature *sig; 1888 exec_list empty; 1889 1890 sig = ir->matching_signature(&empty); 1891 1892 assert(sig); 1893 1894 foreach_iter(exec_list_iterator, iter, sig->body) { 1895 ir_instruction *ir = (ir_instruction *)iter.get(); 1896 this->base_ir = ir; 1897 1898 ir->accept(this); 1899 } 1900 } 1901} 1902 1903void 1904fs_visitor::visit(ir_function_signature *ir) 1905{ 1906 assert(!"not reached"); 1907 (void)ir; 1908} 1909 1910fs_inst * 1911fs_visitor::emit(fs_inst inst) 1912{ 1913 fs_inst *list_inst = new(mem_ctx) fs_inst; 1914 *list_inst = inst; 1915 1916 list_inst->annotation = this->current_annotation; 1917 list_inst->ir = this->base_ir; 1918 1919 this->instructions.push_tail(list_inst); 1920 1921 return list_inst; 1922} 1923 1924/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1925void 1926fs_visitor::emit_dummy_fs() 1927{ 1928 /* Everyone's favorite color. */ 1929 emit(fs_inst(BRW_OPCODE_MOV, 1930 fs_reg(MRF, 2), 1931 fs_reg(1.0f))); 1932 emit(fs_inst(BRW_OPCODE_MOV, 1933 fs_reg(MRF, 3), 1934 fs_reg(0.0f))); 1935 emit(fs_inst(BRW_OPCODE_MOV, 1936 fs_reg(MRF, 4), 1937 fs_reg(1.0f))); 1938 emit(fs_inst(BRW_OPCODE_MOV, 1939 fs_reg(MRF, 5), 1940 fs_reg(0.0f))); 1941 1942 fs_inst *write; 1943 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1944 fs_reg(0), 1945 fs_reg(0))); 1946 write->base_mrf = 0; 1947} 1948 1949/* The register location here is relative to the start of the URB 1950 * data. It will get adjusted to be a real location before 1951 * generate_code() time. 1952 */ 1953struct brw_reg 1954fs_visitor::interp_reg(int location, int channel) 1955{ 1956 int regnr = urb_setup[location] * 2 + channel / 2; 1957 int stride = (channel & 1) * 4; 1958 1959 assert(urb_setup[location] != -1); 1960 1961 return brw_vec1_grf(regnr, stride); 1962} 1963 1964/** Emits the interpolation for the varying inputs. */ 1965void 1966fs_visitor::emit_interpolation_setup_gen4() 1967{ 1968 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1969 1970 this->current_annotation = "compute pixel centers"; 1971 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1972 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1973 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1974 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1975 emit(fs_inst(BRW_OPCODE_ADD, 1976 this->pixel_x, 1977 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1978 fs_reg(brw_imm_v(0x10101010)))); 1979 emit(fs_inst(BRW_OPCODE_ADD, 1980 this->pixel_y, 1981 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1982 fs_reg(brw_imm_v(0x11001100)))); 1983 1984 this->current_annotation = "compute pixel deltas from v0"; 1985 if (brw->has_pln) { 1986 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1987 this->delta_y = this->delta_x; 1988 this->delta_y.reg_offset++; 1989 } else { 1990 this->delta_x = fs_reg(this, glsl_type::float_type); 1991 this->delta_y = fs_reg(this, glsl_type::float_type); 1992 } 1993 emit(fs_inst(BRW_OPCODE_ADD, 1994 this->delta_x, 1995 this->pixel_x, 1996 fs_reg(negate(brw_vec1_grf(1, 0))))); 1997 emit(fs_inst(BRW_OPCODE_ADD, 1998 this->delta_y, 1999 this->pixel_y, 2000 fs_reg(negate(brw_vec1_grf(1, 1))))); 2001 2002 this->current_annotation = "compute pos.w and 1/pos.w"; 2003 /* Compute wpos.w. It's always in our setup, since it's needed to 2004 * interpolate the other attributes. 2005 */ 2006 this->wpos_w = fs_reg(this, glsl_type::float_type); 2007 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 2008 interp_reg(FRAG_ATTRIB_WPOS, 3))); 2009 /* Compute the pixel 1/W value from wpos.w. */ 2010 this->pixel_w = fs_reg(this, glsl_type::float_type); 2011 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 2012 this->current_annotation = NULL; 2013} 2014 2015/** Emits the interpolation for the varying inputs. */ 2016void 2017fs_visitor::emit_interpolation_setup_gen6() 2018{ 2019 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 2020 2021 /* If the pixel centers end up used, the setup is the same as for gen4. */ 2022 this->current_annotation = "compute pixel centers"; 2023 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 2024 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 2025 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 2026 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 2027 emit(fs_inst(BRW_OPCODE_ADD, 2028 int_pixel_x, 2029 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 2030 fs_reg(brw_imm_v(0x10101010)))); 2031 emit(fs_inst(BRW_OPCODE_ADD, 2032 int_pixel_y, 2033 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 2034 fs_reg(brw_imm_v(0x11001100)))); 2035 2036 /* As of gen6, we can no longer mix float and int sources. We have 2037 * to turn the integer pixel centers into floats for their actual 2038 * use. 2039 */ 2040 this->pixel_x = fs_reg(this, glsl_type::float_type); 2041 this->pixel_y = fs_reg(this, glsl_type::float_type); 2042 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 2043 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 2044 2045 this->current_annotation = "compute 1/pos.w"; 2046 this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 2047 this->pixel_w = fs_reg(this, glsl_type::float_type); 2048 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 2049 2050 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 2051 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 2052 2053 this->current_annotation = NULL; 2054} 2055 2056void 2057fs_visitor::emit_fb_writes() 2058{ 2059 this->current_annotation = "FB write header"; 2060 GLboolean header_present = GL_TRUE; 2061 int nr = 0; 2062 2063 if (intel->gen >= 6 && 2064 !this->kill_emitted && 2065 c->key.nr_color_regions == 1) { 2066 header_present = false; 2067 } 2068 2069 if (header_present) { 2070 /* m0, m1 header */ 2071 nr += 2; 2072 } 2073 2074 if (c->aa_dest_stencil_reg) { 2075 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2076 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); 2077 } 2078 2079 /* Reserve space for color. It'll be filled in per MRT below. */ 2080 int color_mrf = nr; 2081 nr += 4; 2082 2083 if (c->source_depth_to_render_target) { 2084 if (c->computes_depth) { 2085 /* Hand over gl_FragDepth. */ 2086 assert(this->frag_depth); 2087 fs_reg depth = *(variable_storage(this->frag_depth)); 2088 2089 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 2090 } else { 2091 /* Pass through the payload depth. */ 2092 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2093 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 2094 } 2095 } 2096 2097 if (c->dest_depth_reg) { 2098 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2099 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); 2100 } 2101 2102 fs_reg color = reg_undef; 2103 if (this->frag_color) 2104 color = *(variable_storage(this->frag_color)); 2105 else if (this->frag_data) { 2106 color = *(variable_storage(this->frag_data)); 2107 color.type = BRW_REGISTER_TYPE_F; 2108 } 2109 2110 for (int target = 0; target < c->key.nr_color_regions; target++) { 2111 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2112 "FB write target %d", 2113 target); 2114 if (this->frag_color || this->frag_data) { 2115 for (int i = 0; i < 4; i++) { 2116 emit(fs_inst(BRW_OPCODE_MOV, 2117 fs_reg(MRF, color_mrf + i), 2118 color)); 2119 color.reg_offset++; 2120 } 2121 } 2122 2123 if (this->frag_color) 2124 color.reg_offset -= 4; 2125 2126 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2127 reg_undef, reg_undef)); 2128 inst->target = target; 2129 inst->base_mrf = 0; 2130 inst->mlen = nr; 2131 if (target == c->key.nr_color_regions - 1) 2132 inst->eot = true; 2133 inst->header_present = header_present; 2134 } 2135 2136 if (c->key.nr_color_regions == 0) { 2137 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2138 reg_undef, reg_undef)); 2139 inst->base_mrf = 0; 2140 inst->mlen = nr; 2141 inst->eot = true; 2142 inst->header_present = header_present; 2143 } 2144 2145 this->current_annotation = NULL; 2146} 2147 2148void 2149fs_visitor::generate_fb_write(fs_inst *inst) 2150{ 2151 GLboolean eot = inst->eot; 2152 struct brw_reg implied_header; 2153 2154 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 2155 * move, here's g1. 2156 */ 2157 brw_push_insn_state(p); 2158 brw_set_mask_control(p, BRW_MASK_DISABLE); 2159 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2160 2161 if (inst->header_present) { 2162 if (intel->gen >= 6) { 2163 brw_MOV(p, 2164 brw_message_reg(inst->base_mrf), 2165 brw_vec8_grf(0, 0)); 2166 2167 if (inst->target > 0) { 2168 /* Set the render target index for choosing BLEND_STATE. */ 2169 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 2170 BRW_REGISTER_TYPE_UD), 2171 brw_imm_ud(inst->target)); 2172 } 2173 2174 /* Clear viewport index, render target array index. */ 2175 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 2176 BRW_REGISTER_TYPE_UD), 2177 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 2178 brw_imm_ud(0xf7ff)); 2179 2180 implied_header = brw_null_reg(); 2181 } else { 2182 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 2183 } 2184 2185 brw_MOV(p, 2186 brw_message_reg(inst->base_mrf + 1), 2187 brw_vec8_grf(1, 0)); 2188 } else { 2189 implied_header = brw_null_reg(); 2190 } 2191 2192 brw_pop_insn_state(p); 2193 2194 brw_fb_WRITE(p, 2195 8, /* dispatch_width */ 2196 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 2197 inst->base_mrf, 2198 implied_header, 2199 inst->target, 2200 inst->mlen, 2201 0, 2202 eot, 2203 inst->header_present); 2204} 2205 2206void 2207fs_visitor::generate_linterp(fs_inst *inst, 2208 struct brw_reg dst, struct brw_reg *src) 2209{ 2210 struct brw_reg delta_x = src[0]; 2211 struct brw_reg delta_y = src[1]; 2212 struct brw_reg interp = src[2]; 2213 2214 if (brw->has_pln && 2215 delta_y.nr == delta_x.nr + 1 && 2216 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2217 brw_PLN(p, dst, interp, delta_x); 2218 } else { 2219 brw_LINE(p, brw_null_reg(), interp, delta_x); 2220 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2221 } 2222} 2223 2224void 2225fs_visitor::generate_math(fs_inst *inst, 2226 struct brw_reg dst, struct brw_reg *src) 2227{ 2228 int op; 2229 2230 switch (inst->opcode) { 2231 case FS_OPCODE_RCP: 2232 op = BRW_MATH_FUNCTION_INV; 2233 break; 2234 case FS_OPCODE_RSQ: 2235 op = BRW_MATH_FUNCTION_RSQ; 2236 break; 2237 case FS_OPCODE_SQRT: 2238 op = BRW_MATH_FUNCTION_SQRT; 2239 break; 2240 case FS_OPCODE_EXP2: 2241 op = BRW_MATH_FUNCTION_EXP; 2242 break; 2243 case FS_OPCODE_LOG2: 2244 op = BRW_MATH_FUNCTION_LOG; 2245 break; 2246 case FS_OPCODE_POW: 2247 op = BRW_MATH_FUNCTION_POW; 2248 break; 2249 case FS_OPCODE_SIN: 2250 op = BRW_MATH_FUNCTION_SIN; 2251 break; 2252 case FS_OPCODE_COS: 2253 op = BRW_MATH_FUNCTION_COS; 2254 break; 2255 default: 2256 assert(!"not reached: unknown math function"); 2257 op = 0; 2258 break; 2259 } 2260 2261 if (intel->gen >= 6) { 2262 assert(inst->mlen == 0); 2263 2264 if (inst->opcode == FS_OPCODE_POW) { 2265 brw_math2(p, dst, op, src[0], src[1]); 2266 } else { 2267 brw_math(p, dst, 2268 op, 2269 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2270 BRW_MATH_SATURATE_NONE, 2271 0, src[0], 2272 BRW_MATH_DATA_VECTOR, 2273 BRW_MATH_PRECISION_FULL); 2274 } 2275 } else { 2276 assert(inst->mlen >= 1); 2277 2278 brw_math(p, dst, 2279 op, 2280 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2281 BRW_MATH_SATURATE_NONE, 2282 inst->base_mrf, src[0], 2283 BRW_MATH_DATA_VECTOR, 2284 BRW_MATH_PRECISION_FULL); 2285 } 2286} 2287 2288void 2289fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2290{ 2291 int msg_type = -1; 2292 int rlen = 4; 2293 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2294 2295 if (intel->gen >= 5) { 2296 switch (inst->opcode) { 2297 case FS_OPCODE_TEX: 2298 if (inst->shadow_compare) { 2299 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2300 } else { 2301 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2302 } 2303 break; 2304 case FS_OPCODE_TXB: 2305 if (inst->shadow_compare) { 2306 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2307 } else { 2308 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2309 } 2310 break; 2311 } 2312 } else { 2313 switch (inst->opcode) { 2314 case FS_OPCODE_TEX: 2315 /* Note that G45 and older determines shadow compare and dispatch width 2316 * from message length for most messages. 2317 */ 2318 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2319 if (inst->shadow_compare) { 2320 assert(inst->mlen == 6); 2321 } else { 2322 assert(inst->mlen <= 4); 2323 } 2324 break; 2325 case FS_OPCODE_TXB: 2326 if (inst->shadow_compare) { 2327 assert(inst->mlen == 6); 2328 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2329 } else { 2330 assert(inst->mlen == 9); 2331 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2332 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2333 } 2334 break; 2335 } 2336 } 2337 assert(msg_type != -1); 2338 2339 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2340 rlen = 8; 2341 dst = vec16(dst); 2342 } 2343 2344 brw_SAMPLE(p, 2345 retype(dst, BRW_REGISTER_TYPE_UW), 2346 inst->base_mrf, 2347 src, 2348 SURF_INDEX_TEXTURE(inst->sampler), 2349 inst->sampler, 2350 WRITEMASK_XYZW, 2351 msg_type, 2352 rlen, 2353 inst->mlen, 2354 0, 2355 1, 2356 simd_mode); 2357} 2358 2359 2360/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2361 * looking like: 2362 * 2363 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2364 * 2365 * and we're trying to produce: 2366 * 2367 * DDX DDY 2368 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2369 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2370 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2371 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2372 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2373 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2374 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2375 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2376 * 2377 * and add another set of two more subspans if in 16-pixel dispatch mode. 2378 * 2379 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2380 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2381 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2382 * between each other. We could probably do it like ddx and swizzle the right 2383 * order later, but bail for now and just produce 2384 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2385 */ 2386void 2387fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2388{ 2389 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2390 BRW_REGISTER_TYPE_F, 2391 BRW_VERTICAL_STRIDE_2, 2392 BRW_WIDTH_2, 2393 BRW_HORIZONTAL_STRIDE_0, 2394 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2395 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2396 BRW_REGISTER_TYPE_F, 2397 BRW_VERTICAL_STRIDE_2, 2398 BRW_WIDTH_2, 2399 BRW_HORIZONTAL_STRIDE_0, 2400 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2401 brw_ADD(p, dst, src0, negate(src1)); 2402} 2403 2404void 2405fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2406{ 2407 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2408 BRW_REGISTER_TYPE_F, 2409 BRW_VERTICAL_STRIDE_4, 2410 BRW_WIDTH_4, 2411 BRW_HORIZONTAL_STRIDE_0, 2412 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2413 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2414 BRW_REGISTER_TYPE_F, 2415 BRW_VERTICAL_STRIDE_4, 2416 BRW_WIDTH_4, 2417 BRW_HORIZONTAL_STRIDE_0, 2418 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2419 brw_ADD(p, dst, src0, negate(src1)); 2420} 2421 2422void 2423fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2424{ 2425 if (intel->gen >= 6) { 2426 /* Gen6 no longer has the mask reg for us to just read the 2427 * active channels from. However, cmp updates just the channels 2428 * of the flag reg that are enabled, so we can get at the 2429 * channel enables that way. In this step, make a reg of ones 2430 * we'll compare to. 2431 */ 2432 brw_MOV(p, mask, brw_imm_ud(1)); 2433 } else { 2434 brw_push_insn_state(p); 2435 brw_set_mask_control(p, BRW_MASK_DISABLE); 2436 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2437 brw_pop_insn_state(p); 2438 } 2439} 2440 2441void 2442fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2443{ 2444 if (intel->gen >= 6) { 2445 struct brw_reg f0 = brw_flag_reg(); 2446 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2447 2448 brw_push_insn_state(p); 2449 brw_set_mask_control(p, BRW_MASK_DISABLE); 2450 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2451 brw_pop_insn_state(p); 2452 2453 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2454 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2455 /* Undo CMP's whacking of predication*/ 2456 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2457 2458 brw_push_insn_state(p); 2459 brw_set_mask_control(p, BRW_MASK_DISABLE); 2460 brw_AND(p, g1, f0, g1); 2461 brw_pop_insn_state(p); 2462 } else { 2463 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2464 2465 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2466 2467 brw_push_insn_state(p); 2468 brw_set_mask_control(p, BRW_MASK_DISABLE); 2469 brw_AND(p, g0, mask, g0); 2470 brw_pop_insn_state(p); 2471 } 2472} 2473 2474void 2475fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2476{ 2477 assert(inst->mlen != 0); 2478 2479 brw_MOV(p, 2480 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2481 retype(src, BRW_REGISTER_TYPE_UD)); 2482 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2483 inst->offset); 2484} 2485 2486void 2487fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2488{ 2489 assert(inst->mlen != 0); 2490 2491 /* Clear any post destination dependencies that would be ignored by 2492 * the block read. See the B-Spec for pre-gen5 send instruction. 2493 * 2494 * This could use a better solution, since texture sampling and 2495 * math reads could potentially run into it as well -- anywhere 2496 * that we have a SEND with a destination that is a register that 2497 * was written but not read within the last N instructions (what's 2498 * N? unsure). This is rare because of dead code elimination, but 2499 * not impossible. 2500 */ 2501 if (intel->gen == 4 && !intel->is_g4x) 2502 brw_MOV(p, brw_null_reg(), dst); 2503 2504 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2505 inst->offset); 2506 2507 if (intel->gen == 4 && !intel->is_g4x) { 2508 /* gen4 errata: destination from a send can't be used as a 2509 * destination until it's been read. Just read it so we don't 2510 * have to worry. 2511 */ 2512 brw_MOV(p, brw_null_reg(), dst); 2513 } 2514} 2515 2516 2517void 2518fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2519{ 2520 assert(inst->mlen != 0); 2521 2522 /* Clear any post destination dependencies that would be ignored by 2523 * the block read. See the B-Spec for pre-gen5 send instruction. 2524 * 2525 * This could use a better solution, since texture sampling and 2526 * math reads could potentially run into it as well -- anywhere 2527 * that we have a SEND with a destination that is a register that 2528 * was written but not read within the last N instructions (what's 2529 * N? unsure). This is rare because of dead code elimination, but 2530 * not impossible. 2531 */ 2532 if (intel->gen == 4 && !intel->is_g4x) 2533 brw_MOV(p, brw_null_reg(), dst); 2534 2535 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2536 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2537 2538 if (intel->gen == 4 && !intel->is_g4x) { 2539 /* gen4 errata: destination from a send can't be used as a 2540 * destination until it's been read. Just read it so we don't 2541 * have to worry. 2542 */ 2543 brw_MOV(p, brw_null_reg(), dst); 2544 } 2545} 2546 2547/** 2548 * To be called after the last _mesa_add_state_reference() call, to 2549 * set up prog_data.param[] for assign_curb_setup() and 2550 * setup_pull_constants(). 2551 */ 2552void 2553fs_visitor::setup_paramvalues_refs() 2554{ 2555 /* Set up the pointers to ParamValues now that that array is finalized. */ 2556 for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { 2557 c->prog_data.param[i] = 2558 fp->Base.Parameters->ParameterValues[this->param_index[i]] + 2559 this->param_offset[i]; 2560 } 2561} 2562 2563void 2564fs_visitor::assign_curb_setup() 2565{ 2566 c->prog_data.first_curbe_grf = c->nr_payload_regs; 2567 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2568 2569 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2570 foreach_iter(exec_list_iterator, iter, this->instructions) { 2571 fs_inst *inst = (fs_inst *)iter.get(); 2572 2573 for (unsigned int i = 0; i < 3; i++) { 2574 if (inst->src[i].file == UNIFORM) { 2575 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2576 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2577 constant_nr / 8, 2578 constant_nr % 8); 2579 2580 inst->src[i].file = FIXED_HW_REG; 2581 inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 2582 } 2583 } 2584 } 2585} 2586 2587void 2588fs_visitor::calculate_urb_setup() 2589{ 2590 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2591 urb_setup[i] = -1; 2592 } 2593 2594 int urb_next = 0; 2595 /* Figure out where each of the incoming setup attributes lands. */ 2596 if (intel->gen >= 6) { 2597 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2598 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2599 urb_setup[i] = urb_next++; 2600 } 2601 } 2602 } else { 2603 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2604 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2605 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2606 int fp_index; 2607 2608 if (i >= VERT_RESULT_VAR0) 2609 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2610 else if (i <= VERT_RESULT_TEX7) 2611 fp_index = i; 2612 else 2613 fp_index = -1; 2614 2615 if (fp_index >= 0) 2616 urb_setup[fp_index] = urb_next++; 2617 } 2618 } 2619 } 2620 2621 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2622 c->prog_data.urb_read_length = urb_next * 2; 2623} 2624 2625void 2626fs_visitor::assign_urb_setup() 2627{ 2628 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2629 2630 /* Offset all the urb_setup[] index by the actual position of the 2631 * setup regs, now that the location of the constants has been chosen. 2632 */ 2633 foreach_iter(exec_list_iterator, iter, this->instructions) { 2634 fs_inst *inst = (fs_inst *)iter.get(); 2635 2636 if (inst->opcode == FS_OPCODE_LINTERP) { 2637 assert(inst->src[2].file == FIXED_HW_REG); 2638 inst->src[2].fixed_hw_reg.nr += urb_start; 2639 } 2640 2641 if (inst->opcode == FS_OPCODE_CINTERP) { 2642 assert(inst->src[0].file == FIXED_HW_REG); 2643 inst->src[0].fixed_hw_reg.nr += urb_start; 2644 } 2645 } 2646 2647 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2648} 2649 2650/** 2651 * Split large virtual GRFs into separate components if we can. 2652 * 2653 * This is mostly duplicated with what brw_fs_vector_splitting does, 2654 * but that's really conservative because it's afraid of doing 2655 * splitting that doesn't result in real progress after the rest of 2656 * the optimization phases, which would cause infinite looping in 2657 * optimization. We can do it once here, safely. This also has the 2658 * opportunity to split interpolated values, or maybe even uniforms, 2659 * which we don't have at the IR level. 2660 * 2661 * We want to split, because virtual GRFs are what we register 2662 * allocate and spill (due to contiguousness requirements for some 2663 * instructions), and they're what we naturally generate in the 2664 * codegen process, but most virtual GRFs don't actually need to be 2665 * contiguous sets of GRFs. If we split, we'll end up with reduced 2666 * live intervals and better dead code elimination and coalescing. 2667 */ 2668void 2669fs_visitor::split_virtual_grfs() 2670{ 2671 int num_vars = this->virtual_grf_next; 2672 bool split_grf[num_vars]; 2673 int new_virtual_grf[num_vars]; 2674 2675 /* Try to split anything > 0 sized. */ 2676 for (int i = 0; i < num_vars; i++) { 2677 if (this->virtual_grf_sizes[i] != 1) 2678 split_grf[i] = true; 2679 else 2680 split_grf[i] = false; 2681 } 2682 2683 if (brw->has_pln) { 2684 /* PLN opcodes rely on the delta_xy being contiguous. */ 2685 split_grf[this->delta_x.reg] = false; 2686 } 2687 2688 foreach_iter(exec_list_iterator, iter, this->instructions) { 2689 fs_inst *inst = (fs_inst *)iter.get(); 2690 2691 /* Texturing produces 4 contiguous registers, so no splitting. */ 2692 if (inst->is_tex()) { 2693 split_grf[inst->dst.reg] = false; 2694 } 2695 } 2696 2697 /* Allocate new space for split regs. Note that the virtual 2698 * numbers will be contiguous. 2699 */ 2700 for (int i = 0; i < num_vars; i++) { 2701 if (split_grf[i]) { 2702 new_virtual_grf[i] = virtual_grf_alloc(1); 2703 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2704 int reg = virtual_grf_alloc(1); 2705 assert(reg == new_virtual_grf[i] + j - 1); 2706 (void) reg; 2707 } 2708 this->virtual_grf_sizes[i] = 1; 2709 } 2710 } 2711 2712 foreach_iter(exec_list_iterator, iter, this->instructions) { 2713 fs_inst *inst = (fs_inst *)iter.get(); 2714 2715 if (inst->dst.file == GRF && 2716 split_grf[inst->dst.reg] && 2717 inst->dst.reg_offset != 0) { 2718 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2719 inst->dst.reg_offset - 1); 2720 inst->dst.reg_offset = 0; 2721 } 2722 for (int i = 0; i < 3; i++) { 2723 if (inst->src[i].file == GRF && 2724 split_grf[inst->src[i].reg] && 2725 inst->src[i].reg_offset != 0) { 2726 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2727 inst->src[i].reg_offset - 1); 2728 inst->src[i].reg_offset = 0; 2729 } 2730 } 2731 } 2732 this->live_intervals_valid = false; 2733} 2734 2735/** 2736 * Choose accesses from the UNIFORM file to demote to using the pull 2737 * constant buffer. 2738 * 2739 * We allow a fragment shader to have more than the specified minimum 2740 * maximum number of fragment shader uniform components (64). If 2741 * there are too many of these, they'd fill up all of register space. 2742 * So, this will push some of them out to the pull constant buffer and 2743 * update the program to load them. 2744 */ 2745void 2746fs_visitor::setup_pull_constants() 2747{ 2748 /* Only allow 16 registers (128 uniform components) as push constants. */ 2749 unsigned int max_uniform_components = 16 * 8; 2750 if (c->prog_data.nr_params <= max_uniform_components) 2751 return; 2752 2753 /* Just demote the end of the list. We could probably do better 2754 * here, demoting things that are rarely used in the program first. 2755 */ 2756 int pull_uniform_base = max_uniform_components; 2757 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2758 2759 foreach_iter(exec_list_iterator, iter, this->instructions) { 2760 fs_inst *inst = (fs_inst *)iter.get(); 2761 2762 for (int i = 0; i < 3; i++) { 2763 if (inst->src[i].file != UNIFORM) 2764 continue; 2765 2766 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2767 if (uniform_nr < pull_uniform_base) 2768 continue; 2769 2770 fs_reg dst = fs_reg(this, glsl_type::float_type); 2771 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2772 dst); 2773 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2774 pull->ir = inst->ir; 2775 pull->annotation = inst->annotation; 2776 pull->base_mrf = 14; 2777 pull->mlen = 1; 2778 2779 inst->insert_before(pull); 2780 2781 inst->src[i].file = GRF; 2782 inst->src[i].reg = dst.reg; 2783 inst->src[i].reg_offset = 0; 2784 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2785 } 2786 } 2787 2788 for (int i = 0; i < pull_uniform_count; i++) { 2789 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2790 c->prog_data.pull_param_convert[i] = 2791 c->prog_data.param_convert[pull_uniform_base + i]; 2792 } 2793 c->prog_data.nr_params -= pull_uniform_count; 2794 c->prog_data.nr_pull_params = pull_uniform_count; 2795} 2796 2797void 2798fs_visitor::calculate_live_intervals() 2799{ 2800 int num_vars = this->virtual_grf_next; 2801 int *def = ralloc_array(mem_ctx, int, num_vars); 2802 int *use = ralloc_array(mem_ctx, int, num_vars); 2803 int loop_depth = 0; 2804 int loop_start = 0; 2805 int bb_header_ip = 0; 2806 2807 if (this->live_intervals_valid) 2808 return; 2809 2810 for (int i = 0; i < num_vars; i++) { 2811 def[i] = MAX_INSTRUCTION; 2812 use[i] = -1; 2813 } 2814 2815 int ip = 0; 2816 foreach_iter(exec_list_iterator, iter, this->instructions) { 2817 fs_inst *inst = (fs_inst *)iter.get(); 2818 2819 if (inst->opcode == BRW_OPCODE_DO) { 2820 if (loop_depth++ == 0) 2821 loop_start = ip; 2822 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2823 loop_depth--; 2824 2825 if (loop_depth == 0) { 2826 /* Patches up the use of vars marked for being live across 2827 * the whole loop. 2828 */ 2829 for (int i = 0; i < num_vars; i++) { 2830 if (use[i] == loop_start) { 2831 use[i] = ip; 2832 } 2833 } 2834 } 2835 } else { 2836 for (unsigned int i = 0; i < 3; i++) { 2837 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2838 int reg = inst->src[i].reg; 2839 2840 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2841 def[reg] >= bb_header_ip)) { 2842 use[reg] = ip; 2843 } else { 2844 def[reg] = MIN2(loop_start, def[reg]); 2845 use[reg] = loop_start; 2846 2847 /* Nobody else is going to go smash our start to 2848 * later in the loop now, because def[reg] now 2849 * points before the bb header. 2850 */ 2851 } 2852 } 2853 } 2854 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2855 int reg = inst->dst.reg; 2856 2857 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2858 !inst->predicated)) { 2859 def[reg] = MIN2(def[reg], ip); 2860 } else { 2861 def[reg] = MIN2(def[reg], loop_start); 2862 } 2863 } 2864 } 2865 2866 ip++; 2867 2868 /* Set the basic block header IP. This is used for determining 2869 * if a complete def of single-register virtual GRF in a loop 2870 * dominates a use in the same basic block. It's a quick way to 2871 * reduce the live interval range of most register used in a 2872 * loop. 2873 */ 2874 if (inst->opcode == BRW_OPCODE_IF || 2875 inst->opcode == BRW_OPCODE_ELSE || 2876 inst->opcode == BRW_OPCODE_ENDIF || 2877 inst->opcode == BRW_OPCODE_DO || 2878 inst->opcode == BRW_OPCODE_WHILE || 2879 inst->opcode == BRW_OPCODE_BREAK || 2880 inst->opcode == BRW_OPCODE_CONTINUE) { 2881 bb_header_ip = ip; 2882 } 2883 } 2884 2885 ralloc_free(this->virtual_grf_def); 2886 ralloc_free(this->virtual_grf_use); 2887 this->virtual_grf_def = def; 2888 this->virtual_grf_use = use; 2889 2890 this->live_intervals_valid = true; 2891} 2892 2893/** 2894 * Attempts to move immediate constants into the immediate 2895 * constant slot of following instructions. 2896 * 2897 * Immediate constants are a bit tricky -- they have to be in the last 2898 * operand slot, you can't do abs/negate on them, 2899 */ 2900 2901bool 2902fs_visitor::propagate_constants() 2903{ 2904 bool progress = false; 2905 2906 calculate_live_intervals(); 2907 2908 foreach_iter(exec_list_iterator, iter, this->instructions) { 2909 fs_inst *inst = (fs_inst *)iter.get(); 2910 2911 if (inst->opcode != BRW_OPCODE_MOV || 2912 inst->predicated || 2913 inst->dst.file != GRF || inst->src[0].file != IMM || 2914 inst->dst.type != inst->src[0].type) 2915 continue; 2916 2917 /* Don't bother with cases where we should have had the 2918 * operation on the constant folded in GLSL already. 2919 */ 2920 if (inst->saturate) 2921 continue; 2922 2923 /* Found a move of a constant to a GRF. Find anything else using the GRF 2924 * before it's written, and replace it with the constant if we can. 2925 */ 2926 exec_list_iterator scan_iter = iter; 2927 scan_iter.next(); 2928 for (; scan_iter.has_next(); scan_iter.next()) { 2929 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2930 2931 if (scan_inst->opcode == BRW_OPCODE_DO || 2932 scan_inst->opcode == BRW_OPCODE_WHILE || 2933 scan_inst->opcode == BRW_OPCODE_ELSE || 2934 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2935 break; 2936 } 2937 2938 for (int i = 2; i >= 0; i--) { 2939 if (scan_inst->src[i].file != GRF || 2940 scan_inst->src[i].reg != inst->dst.reg || 2941 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2942 continue; 2943 2944 /* Don't bother with cases where we should have had the 2945 * operation on the constant folded in GLSL already. 2946 */ 2947 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2948 continue; 2949 2950 switch (scan_inst->opcode) { 2951 case BRW_OPCODE_MOV: 2952 scan_inst->src[i] = inst->src[0]; 2953 progress = true; 2954 break; 2955 2956 case BRW_OPCODE_MUL: 2957 case BRW_OPCODE_ADD: 2958 if (i == 1) { 2959 scan_inst->src[i] = inst->src[0]; 2960 progress = true; 2961 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2962 /* Fit this constant in by commuting the operands */ 2963 scan_inst->src[0] = scan_inst->src[1]; 2964 scan_inst->src[1] = inst->src[0]; 2965 progress = true; 2966 } 2967 break; 2968 case BRW_OPCODE_CMP: 2969 case BRW_OPCODE_SEL: 2970 if (i == 1) { 2971 scan_inst->src[i] = inst->src[0]; 2972 progress = true; 2973 } 2974 } 2975 } 2976 2977 if (scan_inst->dst.file == GRF && 2978 scan_inst->dst.reg == inst->dst.reg && 2979 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2980 scan_inst->is_tex())) { 2981 break; 2982 } 2983 } 2984 } 2985 2986 if (progress) 2987 this->live_intervals_valid = false; 2988 2989 return progress; 2990} 2991/** 2992 * Must be called after calculate_live_intervales() to remove unused 2993 * writes to registers -- register allocation will fail otherwise 2994 * because something deffed but not used won't be considered to 2995 * interfere with other regs. 2996 */ 2997bool 2998fs_visitor::dead_code_eliminate() 2999{ 3000 bool progress = false; 3001 int pc = 0; 3002 3003 calculate_live_intervals(); 3004 3005 foreach_iter(exec_list_iterator, iter, this->instructions) { 3006 fs_inst *inst = (fs_inst *)iter.get(); 3007 3008 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 3009 inst->remove(); 3010 progress = true; 3011 } 3012 3013 pc++; 3014 } 3015 3016 if (progress) 3017 live_intervals_valid = false; 3018 3019 return progress; 3020} 3021 3022bool 3023fs_visitor::register_coalesce() 3024{ 3025 bool progress = false; 3026 int if_depth = 0; 3027 int loop_depth = 0; 3028 3029 foreach_iter(exec_list_iterator, iter, this->instructions) { 3030 fs_inst *inst = (fs_inst *)iter.get(); 3031 3032 /* Make sure that we dominate the instructions we're going to 3033 * scan for interfering with our coalescing, or we won't have 3034 * scanned enough to see if anything interferes with our 3035 * coalescing. We don't dominate the following instructions if 3036 * we're in a loop or an if block. 3037 */ 3038 switch (inst->opcode) { 3039 case BRW_OPCODE_DO: 3040 loop_depth++; 3041 break; 3042 case BRW_OPCODE_WHILE: 3043 loop_depth--; 3044 break; 3045 case BRW_OPCODE_IF: 3046 if_depth++; 3047 break; 3048 case BRW_OPCODE_ENDIF: 3049 if_depth--; 3050 break; 3051 } 3052 if (loop_depth || if_depth) 3053 continue; 3054 3055 if (inst->opcode != BRW_OPCODE_MOV || 3056 inst->predicated || 3057 inst->saturate || 3058 inst->dst.file != GRF || inst->src[0].file != GRF || 3059 inst->dst.type != inst->src[0].type) 3060 continue; 3061 3062 bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate; 3063 3064 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 3065 * them: check for no writes to either one until the exit of the 3066 * program. 3067 */ 3068 bool interfered = false; 3069 exec_list_iterator scan_iter = iter; 3070 scan_iter.next(); 3071 for (; scan_iter.has_next(); scan_iter.next()) { 3072 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3073 3074 if (scan_inst->dst.file == GRF) { 3075 if (scan_inst->dst.reg == inst->dst.reg && 3076 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 3077 scan_inst->is_tex())) { 3078 interfered = true; 3079 break; 3080 } 3081 if (scan_inst->dst.reg == inst->src[0].reg && 3082 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 3083 scan_inst->is_tex())) { 3084 interfered = true; 3085 break; 3086 } 3087 } 3088 3089 /* The gen6 MATH instruction can't handle source modifiers, so avoid 3090 * coalescing those for now. We should do something more specific. 3091 */ 3092 if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) { 3093 interfered = true; 3094 break; 3095 } 3096 } 3097 if (interfered) { 3098 continue; 3099 } 3100 3101 /* Rewrite the later usage to point at the source of the move to 3102 * be removed. 3103 */ 3104 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 3105 scan_iter.next()) { 3106 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3107 3108 for (int i = 0; i < 3; i++) { 3109 if (scan_inst->src[i].file == GRF && 3110 scan_inst->src[i].reg == inst->dst.reg && 3111 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 3112 scan_inst->src[i].reg = inst->src[0].reg; 3113 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 3114 scan_inst->src[i].abs |= inst->src[0].abs; 3115 scan_inst->src[i].negate ^= inst->src[0].negate; 3116 scan_inst->src[i].smear = inst->src[0].smear; 3117 } 3118 } 3119 } 3120 3121 inst->remove(); 3122 progress = true; 3123 } 3124 3125 if (progress) 3126 live_intervals_valid = false; 3127 3128 return progress; 3129} 3130 3131 3132bool 3133fs_visitor::compute_to_mrf() 3134{ 3135 bool progress = false; 3136 int next_ip = 0; 3137 3138 calculate_live_intervals(); 3139 3140 foreach_iter(exec_list_iterator, iter, this->instructions) { 3141 fs_inst *inst = (fs_inst *)iter.get(); 3142 3143 int ip = next_ip; 3144 next_ip++; 3145 3146 if (inst->opcode != BRW_OPCODE_MOV || 3147 inst->predicated || 3148 inst->dst.file != MRF || inst->src[0].file != GRF || 3149 inst->dst.type != inst->src[0].type || 3150 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 3151 continue; 3152 3153 /* Can't compute-to-MRF this GRF if someone else was going to 3154 * read it later. 3155 */ 3156 if (this->virtual_grf_use[inst->src[0].reg] > ip) 3157 continue; 3158 3159 /* Found a move of a GRF to a MRF. Let's see if we can go 3160 * rewrite the thing that made this GRF to write into the MRF. 3161 */ 3162 fs_inst *scan_inst; 3163 for (scan_inst = (fs_inst *)inst->prev; 3164 scan_inst->prev != NULL; 3165 scan_inst = (fs_inst *)scan_inst->prev) { 3166 if (scan_inst->dst.file == GRF && 3167 scan_inst->dst.reg == inst->src[0].reg) { 3168 /* Found the last thing to write our reg we want to turn 3169 * into a compute-to-MRF. 3170 */ 3171 3172 if (scan_inst->is_tex()) { 3173 /* texturing writes several continuous regs, so we can't 3174 * compute-to-mrf that. 3175 */ 3176 break; 3177 } 3178 3179 /* If it's predicated, it (probably) didn't populate all 3180 * the channels. 3181 */ 3182 if (scan_inst->predicated) 3183 break; 3184 3185 /* SEND instructions can't have MRF as a destination. */ 3186 if (scan_inst->mlen) 3187 break; 3188 3189 if (intel->gen >= 6) { 3190 /* gen6 math instructions must have the destination be 3191 * GRF, so no compute-to-MRF for them. 3192 */ 3193 if (scan_inst->is_math()) { 3194 break; 3195 } 3196 } 3197 3198 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 3199 /* Found the creator of our MRF's source value. */ 3200 scan_inst->dst.file = MRF; 3201 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3202 scan_inst->saturate |= inst->saturate; 3203 inst->remove(); 3204 progress = true; 3205 } 3206 break; 3207 } 3208 3209 /* We don't handle flow control here. Most computation of 3210 * values that end up in MRFs are shortly before the MRF 3211 * write anyway. 3212 */ 3213 if (scan_inst->opcode == BRW_OPCODE_DO || 3214 scan_inst->opcode == BRW_OPCODE_WHILE || 3215 scan_inst->opcode == BRW_OPCODE_ELSE || 3216 scan_inst->opcode == BRW_OPCODE_ENDIF) { 3217 break; 3218 } 3219 3220 /* You can't read from an MRF, so if someone else reads our 3221 * MRF's source GRF that we wanted to rewrite, that stops us. 3222 */ 3223 bool interfered = false; 3224 for (int i = 0; i < 3; i++) { 3225 if (scan_inst->src[i].file == GRF && 3226 scan_inst->src[i].reg == inst->src[0].reg && 3227 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 3228 interfered = true; 3229 } 3230 } 3231 if (interfered) 3232 break; 3233 3234 if (scan_inst->dst.file == MRF && 3235 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 3236 /* Somebody else wrote our MRF here, so we can't can't 3237 * compute-to-MRF before that. 3238 */ 3239 break; 3240 } 3241 3242 if (scan_inst->mlen > 0) { 3243 /* Found a SEND instruction, which means that there are 3244 * live values in MRFs from base_mrf to base_mrf + 3245 * scan_inst->mlen - 1. Don't go pushing our MRF write up 3246 * above it. 3247 */ 3248 if (inst->dst.hw_reg >= scan_inst->base_mrf && 3249 inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { 3250 break; 3251 } 3252 } 3253 } 3254 } 3255 3256 return progress; 3257} 3258 3259/** 3260 * Walks through basic blocks, locking for repeated MRF writes and 3261 * removing the later ones. 3262 */ 3263bool 3264fs_visitor::remove_duplicate_mrf_writes() 3265{ 3266 fs_inst *last_mrf_move[16]; 3267 bool progress = false; 3268 3269 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3270 3271 foreach_iter(exec_list_iterator, iter, this->instructions) { 3272 fs_inst *inst = (fs_inst *)iter.get(); 3273 3274 switch (inst->opcode) { 3275 case BRW_OPCODE_DO: 3276 case BRW_OPCODE_WHILE: 3277 case BRW_OPCODE_IF: 3278 case BRW_OPCODE_ELSE: 3279 case BRW_OPCODE_ENDIF: 3280 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3281 continue; 3282 default: 3283 break; 3284 } 3285 3286 if (inst->opcode == BRW_OPCODE_MOV && 3287 inst->dst.file == MRF) { 3288 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; 3289 if (prev_inst && inst->equals(prev_inst)) { 3290 inst->remove(); 3291 progress = true; 3292 continue; 3293 } 3294 } 3295 3296 /* Clear out the last-write records for MRFs that were overwritten. */ 3297 if (inst->dst.file == MRF) { 3298 last_mrf_move[inst->dst.hw_reg] = NULL; 3299 } 3300 3301 if (inst->mlen > 0) { 3302 /* Found a SEND instruction, which will include two or fewer 3303 * implied MRF writes. We could do better here. 3304 */ 3305 for (int i = 0; i < implied_mrf_writes(inst); i++) { 3306 last_mrf_move[inst->base_mrf + i] = NULL; 3307 } 3308 } 3309 3310 /* Clear out any MRF move records whose sources got overwritten. */ 3311 if (inst->dst.file == GRF) { 3312 for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { 3313 if (last_mrf_move[i] && 3314 last_mrf_move[i]->src[0].reg == inst->dst.reg) { 3315 last_mrf_move[i] = NULL; 3316 } 3317 } 3318 } 3319 3320 if (inst->opcode == BRW_OPCODE_MOV && 3321 inst->dst.file == MRF && 3322 inst->src[0].file == GRF && 3323 !inst->predicated) { 3324 last_mrf_move[inst->dst.hw_reg] = inst; 3325 } 3326 } 3327 3328 return progress; 3329} 3330 3331bool 3332fs_visitor::virtual_grf_interferes(int a, int b) 3333{ 3334 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3335 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3336 3337 /* We can't handle dead register writes here, without iterating 3338 * over the whole instruction stream to find every single dead 3339 * write to that register to compare to the live interval of the 3340 * other register. Just assert that dead_code_eliminate() has been 3341 * called. 3342 */ 3343 assert((this->virtual_grf_use[a] != -1 || 3344 this->virtual_grf_def[a] == MAX_INSTRUCTION) && 3345 (this->virtual_grf_use[b] != -1 || 3346 this->virtual_grf_def[b] == MAX_INSTRUCTION)); 3347 3348 return start < end; 3349} 3350 3351static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3352{ 3353 struct brw_reg brw_reg; 3354 3355 switch (reg->file) { 3356 case GRF: 3357 case ARF: 3358 case MRF: 3359 if (reg->smear == -1) { 3360 brw_reg = brw_vec8_reg(reg->file, 3361 reg->hw_reg, 0); 3362 } else { 3363 brw_reg = brw_vec1_reg(reg->file, 3364 reg->hw_reg, reg->smear); 3365 } 3366 brw_reg = retype(brw_reg, reg->type); 3367 break; 3368 case IMM: 3369 switch (reg->type) { 3370 case BRW_REGISTER_TYPE_F: 3371 brw_reg = brw_imm_f(reg->imm.f); 3372 break; 3373 case BRW_REGISTER_TYPE_D: 3374 brw_reg = brw_imm_d(reg->imm.i); 3375 break; 3376 case BRW_REGISTER_TYPE_UD: 3377 brw_reg = brw_imm_ud(reg->imm.u); 3378 break; 3379 default: 3380 assert(!"not reached"); 3381 brw_reg = brw_null_reg(); 3382 break; 3383 } 3384 break; 3385 case FIXED_HW_REG: 3386 brw_reg = reg->fixed_hw_reg; 3387 break; 3388 case BAD_FILE: 3389 /* Probably unused. */ 3390 brw_reg = brw_null_reg(); 3391 break; 3392 case UNIFORM: 3393 assert(!"not reached"); 3394 brw_reg = brw_null_reg(); 3395 break; 3396 default: 3397 assert(!"not reached"); 3398 brw_reg = brw_null_reg(); 3399 break; 3400 } 3401 if (reg->abs) 3402 brw_reg = brw_abs(brw_reg); 3403 if (reg->negate) 3404 brw_reg = negate(brw_reg); 3405 3406 return brw_reg; 3407} 3408 3409void 3410fs_visitor::generate_code() 3411{ 3412 int last_native_inst = 0; 3413 const char *last_annotation_string = NULL; 3414 ir_instruction *last_annotation_ir = NULL; 3415 3416 int if_stack_array_size = 16; 3417 int loop_stack_array_size = 16; 3418 int if_stack_depth = 0, loop_stack_depth = 0; 3419 brw_instruction **if_stack = 3420 rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size); 3421 brw_instruction **loop_stack = 3422 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 3423 int *if_depth_in_loop = 3424 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 3425 3426 3427 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3428 printf("Native code for fragment shader %d:\n", 3429 ctx->Shader.CurrentFragmentProgram->Name); 3430 } 3431 3432 foreach_iter(exec_list_iterator, iter, this->instructions) { 3433 fs_inst *inst = (fs_inst *)iter.get(); 3434 struct brw_reg src[3], dst; 3435 3436 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3437 if (last_annotation_ir != inst->ir) { 3438 last_annotation_ir = inst->ir; 3439 if (last_annotation_ir) { 3440 printf(" "); 3441 last_annotation_ir->print(); 3442 printf("\n"); 3443 } 3444 } 3445 if (last_annotation_string != inst->annotation) { 3446 last_annotation_string = inst->annotation; 3447 if (last_annotation_string) 3448 printf(" %s\n", last_annotation_string); 3449 } 3450 } 3451 3452 for (unsigned int i = 0; i < 3; i++) { 3453 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3454 } 3455 dst = brw_reg_from_fs_reg(&inst->dst); 3456 3457 brw_set_conditionalmod(p, inst->conditional_mod); 3458 brw_set_predicate_control(p, inst->predicated); 3459 brw_set_saturate(p, inst->saturate); 3460 3461 switch (inst->opcode) { 3462 case BRW_OPCODE_MOV: 3463 brw_MOV(p, dst, src[0]); 3464 break; 3465 case BRW_OPCODE_ADD: 3466 brw_ADD(p, dst, src[0], src[1]); 3467 break; 3468 case BRW_OPCODE_MUL: 3469 brw_MUL(p, dst, src[0], src[1]); 3470 break; 3471 3472 case BRW_OPCODE_FRC: 3473 brw_FRC(p, dst, src[0]); 3474 break; 3475 case BRW_OPCODE_RNDD: 3476 brw_RNDD(p, dst, src[0]); 3477 break; 3478 case BRW_OPCODE_RNDE: 3479 brw_RNDE(p, dst, src[0]); 3480 break; 3481 case BRW_OPCODE_RNDZ: 3482 brw_RNDZ(p, dst, src[0]); 3483 break; 3484 3485 case BRW_OPCODE_AND: 3486 brw_AND(p, dst, src[0], src[1]); 3487 break; 3488 case BRW_OPCODE_OR: 3489 brw_OR(p, dst, src[0], src[1]); 3490 break; 3491 case BRW_OPCODE_XOR: 3492 brw_XOR(p, dst, src[0], src[1]); 3493 break; 3494 case BRW_OPCODE_NOT: 3495 brw_NOT(p, dst, src[0]); 3496 break; 3497 case BRW_OPCODE_ASR: 3498 brw_ASR(p, dst, src[0], src[1]); 3499 break; 3500 case BRW_OPCODE_SHR: 3501 brw_SHR(p, dst, src[0], src[1]); 3502 break; 3503 case BRW_OPCODE_SHL: 3504 brw_SHL(p, dst, src[0], src[1]); 3505 break; 3506 3507 case BRW_OPCODE_CMP: 3508 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3509 break; 3510 case BRW_OPCODE_SEL: 3511 brw_SEL(p, dst, src[0], src[1]); 3512 break; 3513 3514 case BRW_OPCODE_IF: 3515 if (inst->src[0].file != BAD_FILE) { 3516 assert(intel->gen >= 6); 3517 if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); 3518 } else { 3519 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3520 } 3521 if_depth_in_loop[loop_stack_depth]++; 3522 if_stack_depth++; 3523 if (if_stack_array_size <= if_stack_depth) { 3524 if_stack_array_size *= 2; 3525 if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *, 3526 if_stack_array_size); 3527 } 3528 break; 3529 3530 case BRW_OPCODE_ELSE: 3531 if_stack[if_stack_depth - 1] = 3532 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3533 break; 3534 case BRW_OPCODE_ENDIF: 3535 if_stack_depth--; 3536 brw_ENDIF(p , if_stack[if_stack_depth]); 3537 if_depth_in_loop[loop_stack_depth]--; 3538 break; 3539 3540 case BRW_OPCODE_DO: 3541 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3542 if (loop_stack_array_size <= loop_stack_depth) { 3543 loop_stack_array_size *= 2; 3544 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 3545 loop_stack_array_size); 3546 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 3547 loop_stack_array_size); 3548 } 3549 if_depth_in_loop[loop_stack_depth] = 0; 3550 break; 3551 3552 case BRW_OPCODE_BREAK: 3553 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3554 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3555 break; 3556 case BRW_OPCODE_CONTINUE: 3557 /* FINISHME: We need to write the loop instruction support still. */ 3558 if (intel->gen >= 6) 3559 brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); 3560 else 3561 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3562 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3563 break; 3564 3565 case BRW_OPCODE_WHILE: { 3566 struct brw_instruction *inst0, *inst1; 3567 GLuint br = 1; 3568 3569 if (intel->gen >= 5) 3570 br = 2; 3571 3572 assert(loop_stack_depth > 0); 3573 loop_stack_depth--; 3574 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3575 if (intel->gen < 6) { 3576 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3577 while (inst0 > loop_stack[loop_stack_depth]) { 3578 inst0--; 3579 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3580 inst0->bits3.if_else.jump_count == 0) { 3581 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3582 } 3583 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3584 inst0->bits3.if_else.jump_count == 0) { 3585 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3586 } 3587 } 3588 } 3589 } 3590 break; 3591 3592 case FS_OPCODE_RCP: 3593 case FS_OPCODE_RSQ: 3594 case FS_OPCODE_SQRT: 3595 case FS_OPCODE_EXP2: 3596 case FS_OPCODE_LOG2: 3597 case FS_OPCODE_POW: 3598 case FS_OPCODE_SIN: 3599 case FS_OPCODE_COS: 3600 generate_math(inst, dst, src); 3601 break; 3602 case FS_OPCODE_CINTERP: 3603 brw_MOV(p, dst, src[0]); 3604 break; 3605 case FS_OPCODE_LINTERP: 3606 generate_linterp(inst, dst, src); 3607 break; 3608 case FS_OPCODE_TEX: 3609 case FS_OPCODE_TXB: 3610 case FS_OPCODE_TXL: 3611 generate_tex(inst, dst, src[0]); 3612 break; 3613 case FS_OPCODE_DISCARD_NOT: 3614 generate_discard_not(inst, dst); 3615 break; 3616 case FS_OPCODE_DISCARD_AND: 3617 generate_discard_and(inst, src[0]); 3618 break; 3619 case FS_OPCODE_DDX: 3620 generate_ddx(inst, dst, src[0]); 3621 break; 3622 case FS_OPCODE_DDY: 3623 generate_ddy(inst, dst, src[0]); 3624 break; 3625 3626 case FS_OPCODE_SPILL: 3627 generate_spill(inst, src[0]); 3628 break; 3629 3630 case FS_OPCODE_UNSPILL: 3631 generate_unspill(inst, dst); 3632 break; 3633 3634 case FS_OPCODE_PULL_CONSTANT_LOAD: 3635 generate_pull_constant_load(inst, dst); 3636 break; 3637 3638 case FS_OPCODE_FB_WRITE: 3639 generate_fb_write(inst); 3640 break; 3641 default: 3642 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3643 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3644 brw_opcodes[inst->opcode].name); 3645 } else { 3646 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3647 } 3648 this->fail = true; 3649 } 3650 3651 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3652 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3653 if (0) { 3654 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3655 ((uint32_t *)&p->store[i])[3], 3656 ((uint32_t *)&p->store[i])[2], 3657 ((uint32_t *)&p->store[i])[1], 3658 ((uint32_t *)&p->store[i])[0]); 3659 } 3660 brw_disasm(stdout, &p->store[i], intel->gen); 3661 } 3662 } 3663 3664 last_native_inst = p->nr_insn; 3665 } 3666 3667 ralloc_free(if_stack); 3668 ralloc_free(loop_stack); 3669 ralloc_free(if_depth_in_loop); 3670 3671 brw_set_uip_jip(p); 3672 3673 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS 3674 * emit issues, it doesn't get the jump distances into the output, 3675 * which is often something we want to debug. So this is here in 3676 * case you're doing that. 3677 */ 3678 if (0) { 3679 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3680 for (unsigned int i = 0; i < p->nr_insn; i++) { 3681 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3682 ((uint32_t *)&p->store[i])[3], 3683 ((uint32_t *)&p->store[i])[2], 3684 ((uint32_t *)&p->store[i])[1], 3685 ((uint32_t *)&p->store[i])[0]); 3686 brw_disasm(stdout, &p->store[i], intel->gen); 3687 } 3688 } 3689 } 3690} 3691 3692GLboolean 3693brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3694{ 3695 struct intel_context *intel = &brw->intel; 3696 struct gl_context *ctx = &intel->ctx; 3697 struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; 3698 3699 if (!prog) 3700 return GL_FALSE; 3701 3702 struct brw_shader *shader = 3703 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3704 if (!shader) 3705 return GL_FALSE; 3706 3707 /* We always use 8-wide mode, at least for now. For one, flow 3708 * control only works in 8-wide. Also, when we're fragment shader 3709 * bound, we're almost always under register pressure as well, so 3710 * 8-wide would save us from the performance cliff of spilling 3711 * regs. 3712 */ 3713 c->dispatch_width = 8; 3714 3715 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3716 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3717 _mesa_print_ir(shader->ir, NULL); 3718 printf("\n"); 3719 } 3720 3721 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3722 */ 3723 fs_visitor v(c, shader); 3724 3725 if (0) { 3726 v.emit_dummy_fs(); 3727 } else { 3728 v.calculate_urb_setup(); 3729 if (intel->gen < 6) 3730 v.emit_interpolation_setup_gen4(); 3731 else 3732 v.emit_interpolation_setup_gen6(); 3733 3734 /* Generate FS IR for main(). (the visitor only descends into 3735 * functions called "main"). 3736 */ 3737 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3738 ir_instruction *ir = (ir_instruction *)iter.get(); 3739 v.base_ir = ir; 3740 ir->accept(&v); 3741 } 3742 3743 v.emit_fb_writes(); 3744 3745 v.split_virtual_grfs(); 3746 3747 v.setup_paramvalues_refs(); 3748 v.setup_pull_constants(); 3749 3750 bool progress; 3751 do { 3752 progress = false; 3753 3754 progress = v.remove_duplicate_mrf_writes() || progress; 3755 3756 progress = v.propagate_constants() || progress; 3757 progress = v.register_coalesce() || progress; 3758 progress = v.compute_to_mrf() || progress; 3759 progress = v.dead_code_eliminate() || progress; 3760 } while (progress); 3761 3762 v.schedule_instructions(); 3763 3764 v.assign_curb_setup(); 3765 v.assign_urb_setup(); 3766 3767 if (0) { 3768 /* Debug of register spilling: Go spill everything. */ 3769 int virtual_grf_count = v.virtual_grf_next; 3770 for (int i = 1; i < virtual_grf_count; i++) { 3771 v.spill_reg(i); 3772 } 3773 } 3774 3775 if (0) 3776 v.assign_regs_trivial(); 3777 else { 3778 while (!v.assign_regs()) { 3779 if (v.fail) 3780 break; 3781 } 3782 } 3783 } 3784 3785 if (!v.fail) 3786 v.generate_code(); 3787 3788 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3789 3790 if (v.fail) 3791 return GL_FALSE; 3792 3793 c->prog_data.total_grf = v.grf_used; 3794 3795 return GL_TRUE; 3796} 3797