brw_fs.cpp revision 2279156fe7ac9718533b8b0de90ae96100486680
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44} 45#include "brw_fs.h" 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir_optimization.h" 48#include "../glsl/ir_print_visitor.h" 49 50#define MAX_INSTRUCTION (1 << 30) 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = rzalloc(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = rzalloc(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct brw_context *brw = brw_context(ctx); 93 struct intel_context *intel = &brw->intel; 94 95 struct brw_shader *shader = 96 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 97 if (shader != NULL) { 98 void *mem_ctx = ralloc_context(NULL); 99 bool progress; 100 101 if (shader->ir) 102 ralloc_free(shader->ir); 103 shader->ir = new(shader) exec_list; 104 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 105 106 do_mat_op_to_vec(shader->ir); 107 lower_instructions(shader->ir, 108 MOD_TO_FRACT | 109 DIV_TO_MUL_RCP | 110 SUB_TO_ADD_NEG | 111 EXP_TO_EXP2 | 112 LOG_TO_LOG2); 113 114 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, 115 * if-statements need to be flattened. 116 */ 117 if (intel->gen < 6) 118 lower_if_to_cond_assign(shader->ir, 16); 119 120 do_lower_texture_projection(shader->ir); 121 do_vec_index_to_cond_assign(shader->ir); 122 brw_do_cubemap_normalize(shader->ir); 123 lower_noise(shader->ir); 124 lower_quadop_vector(shader->ir, false); 125 lower_variable_index_to_cond_assign(shader->ir, 126 GL_TRUE, /* input */ 127 GL_TRUE, /* output */ 128 GL_TRUE, /* temp */ 129 GL_TRUE /* uniform */ 130 ); 131 132 do { 133 progress = false; 134 135 brw_do_channel_expressions(shader->ir); 136 brw_do_vector_splitting(shader->ir); 137 138 progress = do_lower_jumps(shader->ir, true, true, 139 true, /* main return */ 140 false, /* continue */ 141 false /* loops */ 142 ) || progress; 143 144 progress = do_common_optimization(shader->ir, true, 32) || progress; 145 } while (progress); 146 147 validate_ir_tree(shader->ir); 148 149 reparent_ir(shader->ir, shader->ir); 150 ralloc_free(mem_ctx); 151 } 152 153 if (!_mesa_ir_link_shader(ctx, prog)) 154 return GL_FALSE; 155 156 return GL_TRUE; 157} 158 159static int 160type_size(const struct glsl_type *type) 161{ 162 unsigned int size, i; 163 164 switch (type->base_type) { 165 case GLSL_TYPE_UINT: 166 case GLSL_TYPE_INT: 167 case GLSL_TYPE_FLOAT: 168 case GLSL_TYPE_BOOL: 169 return type->components(); 170 case GLSL_TYPE_ARRAY: 171 return type_size(type->fields.array) * type->length; 172 case GLSL_TYPE_STRUCT: 173 size = 0; 174 for (i = 0; i < type->length; i++) { 175 size += type_size(type->fields.structure[i].type); 176 } 177 return size; 178 case GLSL_TYPE_SAMPLER: 179 /* Samplers take up no register space, since they're baked in at 180 * link time. 181 */ 182 return 0; 183 default: 184 assert(!"not reached"); 185 return 0; 186 } 187} 188 189/** 190 * Returns how many MRFs an FS opcode will write over. 191 * 192 * Note that this is not the 0 or 1 implied writes in an actual gen 193 * instruction -- the FS opcodes often generate MOVs in addition. 194 */ 195int 196fs_visitor::implied_mrf_writes(fs_inst *inst) 197{ 198 if (inst->mlen == 0) 199 return 0; 200 201 switch (inst->opcode) { 202 case FS_OPCODE_RCP: 203 case FS_OPCODE_RSQ: 204 case FS_OPCODE_SQRT: 205 case FS_OPCODE_EXP2: 206 case FS_OPCODE_LOG2: 207 case FS_OPCODE_SIN: 208 case FS_OPCODE_COS: 209 return 1; 210 case FS_OPCODE_POW: 211 return 2; 212 case FS_OPCODE_TEX: 213 case FS_OPCODE_TXB: 214 case FS_OPCODE_TXD: 215 case FS_OPCODE_TXL: 216 return 1; 217 case FS_OPCODE_FB_WRITE: 218 return 2; 219 case FS_OPCODE_PULL_CONSTANT_LOAD: 220 case FS_OPCODE_UNSPILL: 221 return 1; 222 case FS_OPCODE_SPILL: 223 return 2; 224 default: 225 assert(!"not reached"); 226 return inst->mlen; 227 } 228} 229 230int 231fs_visitor::virtual_grf_alloc(int size) 232{ 233 if (virtual_grf_array_size <= virtual_grf_next) { 234 if (virtual_grf_array_size == 0) 235 virtual_grf_array_size = 16; 236 else 237 virtual_grf_array_size *= 2; 238 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 239 virtual_grf_array_size); 240 241 /* This slot is always unused. */ 242 virtual_grf_sizes[0] = 0; 243 } 244 virtual_grf_sizes[virtual_grf_next] = size; 245 return virtual_grf_next++; 246} 247 248/** Fixed HW reg constructor. */ 249fs_reg::fs_reg(enum register_file file, int hw_reg) 250{ 251 init(); 252 this->file = file; 253 this->hw_reg = hw_reg; 254 this->type = BRW_REGISTER_TYPE_F; 255} 256 257/** Fixed HW reg constructor. */ 258fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 259{ 260 init(); 261 this->file = file; 262 this->hw_reg = hw_reg; 263 this->type = type; 264} 265 266int 267brw_type_for_base_type(const struct glsl_type *type) 268{ 269 switch (type->base_type) { 270 case GLSL_TYPE_FLOAT: 271 return BRW_REGISTER_TYPE_F; 272 case GLSL_TYPE_INT: 273 case GLSL_TYPE_BOOL: 274 return BRW_REGISTER_TYPE_D; 275 case GLSL_TYPE_UINT: 276 return BRW_REGISTER_TYPE_UD; 277 case GLSL_TYPE_ARRAY: 278 case GLSL_TYPE_STRUCT: 279 case GLSL_TYPE_SAMPLER: 280 /* These should be overridden with the type of the member when 281 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 282 * way to trip up if we don't. 283 */ 284 return BRW_REGISTER_TYPE_UD; 285 default: 286 assert(!"not reached"); 287 return BRW_REGISTER_TYPE_F; 288 } 289} 290 291/** Automatic reg constructor. */ 292fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 293{ 294 init(); 295 296 this->file = GRF; 297 this->reg = v->virtual_grf_alloc(type_size(type)); 298 this->reg_offset = 0; 299 this->type = brw_type_for_base_type(type); 300} 301 302fs_reg * 303fs_visitor::variable_storage(ir_variable *var) 304{ 305 return (fs_reg *)hash_table_find(this->variable_ht, var); 306} 307 308/* Our support for uniforms is piggy-backed on the struct 309 * gl_fragment_program, because that's where the values actually 310 * get stored, rather than in some global gl_shader_program uniform 311 * store. 312 */ 313int 314fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 315{ 316 unsigned int offset = 0; 317 318 if (type->is_matrix()) { 319 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 320 type->vector_elements, 321 1); 322 323 for (unsigned int i = 0; i < type->matrix_columns; i++) { 324 offset += setup_uniform_values(loc + offset, column); 325 } 326 327 return offset; 328 } 329 330 switch (type->base_type) { 331 case GLSL_TYPE_FLOAT: 332 case GLSL_TYPE_UINT: 333 case GLSL_TYPE_INT: 334 case GLSL_TYPE_BOOL: 335 for (unsigned int i = 0; i < type->vector_elements; i++) { 336 unsigned int param = c->prog_data.nr_params++; 337 338 assert(param < ARRAY_SIZE(c->prog_data.param)); 339 340 switch (type->base_type) { 341 case GLSL_TYPE_FLOAT: 342 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 343 break; 344 case GLSL_TYPE_UINT: 345 c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; 346 break; 347 case GLSL_TYPE_INT: 348 c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; 349 break; 350 case GLSL_TYPE_BOOL: 351 c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; 352 break; 353 default: 354 assert(!"not reached"); 355 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 356 break; 357 } 358 this->param_index[param] = loc; 359 this->param_offset[param] = i; 360 } 361 return 1; 362 363 case GLSL_TYPE_STRUCT: 364 for (unsigned int i = 0; i < type->length; i++) { 365 offset += setup_uniform_values(loc + offset, 366 type->fields.structure[i].type); 367 } 368 return offset; 369 370 case GLSL_TYPE_ARRAY: 371 for (unsigned int i = 0; i < type->length; i++) { 372 offset += setup_uniform_values(loc + offset, type->fields.array); 373 } 374 return offset; 375 376 case GLSL_TYPE_SAMPLER: 377 /* The sampler takes up a slot, but we don't use any values from it. */ 378 return 1; 379 380 default: 381 assert(!"not reached"); 382 return 0; 383 } 384} 385 386 387/* Our support for builtin uniforms is even scarier than non-builtin. 388 * It sits on top of the PROG_STATE_VAR parameters that are 389 * automatically updated from GL context state. 390 */ 391void 392fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 393{ 394 const struct gl_builtin_uniform_desc *statevar = NULL; 395 396 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 397 statevar = &_mesa_builtin_uniform_desc[i]; 398 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 399 break; 400 } 401 402 if (!statevar->name) { 403 this->fail = true; 404 printf("Failed to find builtin uniform `%s'\n", ir->name); 405 return; 406 } 407 408 int array_count; 409 if (ir->type->is_array()) { 410 array_count = ir->type->length; 411 } else { 412 array_count = 1; 413 } 414 415 for (int a = 0; a < array_count; a++) { 416 for (unsigned int i = 0; i < statevar->num_elements; i++) { 417 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 418 int tokens[STATE_LENGTH]; 419 420 memcpy(tokens, element->tokens, sizeof(element->tokens)); 421 if (ir->type->is_array()) { 422 tokens[1] = a; 423 } 424 425 /* This state reference has already been setup by ir_to_mesa, 426 * but we'll get the same index back here. 427 */ 428 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 429 (gl_state_index *)tokens); 430 431 /* Add each of the unique swizzles of the element as a 432 * parameter. This'll end up matching the expected layout of 433 * the array/matrix/structure we're trying to fill in. 434 */ 435 int last_swiz = -1; 436 for (unsigned int i = 0; i < 4; i++) { 437 int swiz = GET_SWZ(element->swizzle, i); 438 if (swiz == last_swiz) 439 break; 440 last_swiz = swiz; 441 442 c->prog_data.param_convert[c->prog_data.nr_params] = 443 PARAM_NO_CONVERT; 444 this->param_index[c->prog_data.nr_params] = index; 445 this->param_offset[c->prog_data.nr_params] = swiz; 446 c->prog_data.nr_params++; 447 } 448 } 449 } 450} 451 452fs_reg * 453fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 454{ 455 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 456 fs_reg wpos = *reg; 457 fs_reg neg_y = this->pixel_y; 458 neg_y.negate = true; 459 bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 460 461 /* gl_FragCoord.x */ 462 if (ir->pixel_center_integer) { 463 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 464 } else { 465 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 466 } 467 wpos.reg_offset++; 468 469 /* gl_FragCoord.y */ 470 if (!flip && ir->pixel_center_integer) { 471 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 472 } else { 473 fs_reg pixel_y = this->pixel_y; 474 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 475 476 if (flip) { 477 pixel_y.negate = true; 478 offset += c->key.drawable_height - 1.0; 479 } 480 481 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 482 } 483 wpos.reg_offset++; 484 485 /* gl_FragCoord.z */ 486 if (intel->gen >= 6) { 487 emit(fs_inst(BRW_OPCODE_MOV, wpos, 488 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 489 } else { 490 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 491 interp_reg(FRAG_ATTRIB_WPOS, 2))); 492 } 493 wpos.reg_offset++; 494 495 /* gl_FragCoord.w: Already set up in emit_interpolation */ 496 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 497 498 return reg; 499} 500 501fs_reg * 502fs_visitor::emit_general_interpolation(ir_variable *ir) 503{ 504 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 505 /* Interpolation is always in floating point regs. */ 506 reg->type = BRW_REGISTER_TYPE_F; 507 fs_reg attr = *reg; 508 509 unsigned int array_elements; 510 const glsl_type *type; 511 512 if (ir->type->is_array()) { 513 array_elements = ir->type->length; 514 if (array_elements == 0) { 515 this->fail = true; 516 } 517 type = ir->type->fields.array; 518 } else { 519 array_elements = 1; 520 type = ir->type; 521 } 522 523 int location = ir->location; 524 for (unsigned int i = 0; i < array_elements; i++) { 525 for (unsigned int j = 0; j < type->matrix_columns; j++) { 526 if (urb_setup[location] == -1) { 527 /* If there's no incoming setup data for this slot, don't 528 * emit interpolation for it. 529 */ 530 attr.reg_offset += type->vector_elements; 531 location++; 532 continue; 533 } 534 535 if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || 536 location == FRAG_ATTRIB_COL1)) { 537 /* Constant interpolation (flat shading) case. The SF has 538 * handed us defined values in only the constant offset 539 * field of the setup reg. 540 */ 541 for (unsigned int c = 0; c < type->vector_elements; c++) { 542 struct brw_reg interp = interp_reg(location, c); 543 interp = suboffset(interp, 3); 544 emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); 545 attr.reg_offset++; 546 } 547 } else { 548 /* Perspective interpolation case. */ 549 for (unsigned int c = 0; c < type->vector_elements; c++) { 550 struct brw_reg interp = interp_reg(location, c); 551 emit(fs_inst(FS_OPCODE_LINTERP, 552 attr, 553 this->delta_x, 554 this->delta_y, 555 fs_reg(interp))); 556 attr.reg_offset++; 557 } 558 559 if (intel->gen < 6) { 560 attr.reg_offset -= type->vector_elements; 561 for (unsigned int c = 0; c < type->vector_elements; c++) { 562 emit(fs_inst(BRW_OPCODE_MUL, 563 attr, 564 attr, 565 this->pixel_w)); 566 attr.reg_offset++; 567 } 568 } 569 } 570 location++; 571 } 572 } 573 574 return reg; 575} 576 577fs_reg * 578fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 579{ 580 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 581 582 /* The frontfacing comes in as a bit in the thread payload. */ 583 if (intel->gen >= 6) { 584 emit(fs_inst(BRW_OPCODE_ASR, 585 *reg, 586 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 587 fs_reg(15))); 588 emit(fs_inst(BRW_OPCODE_NOT, 589 *reg, 590 *reg)); 591 emit(fs_inst(BRW_OPCODE_AND, 592 *reg, 593 *reg, 594 fs_reg(1))); 595 } else { 596 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 597 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 598 * us front face 599 */ 600 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 601 *reg, 602 fs_reg(r1_6ud), 603 fs_reg(1u << 31))); 604 inst->conditional_mod = BRW_CONDITIONAL_L; 605 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 606 } 607 608 return reg; 609} 610 611fs_inst * 612fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 613{ 614 switch (opcode) { 615 case FS_OPCODE_RCP: 616 case FS_OPCODE_RSQ: 617 case FS_OPCODE_SQRT: 618 case FS_OPCODE_EXP2: 619 case FS_OPCODE_LOG2: 620 case FS_OPCODE_SIN: 621 case FS_OPCODE_COS: 622 break; 623 default: 624 assert(!"not reached: bad math opcode"); 625 return NULL; 626 } 627 628 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 629 * might be able to do better by doing execsize = 1 math and then 630 * expanding that result out, but we would need to be careful with 631 * masking. 632 * 633 * The hardware ignores source modifiers (negate and abs) on math 634 * instructions, so we also move to a temp to set those up. 635 */ 636 if (intel->gen >= 6 && (src.file == UNIFORM || 637 src.abs || 638 src.negate)) { 639 fs_reg expanded = fs_reg(this, glsl_type::float_type); 640 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 641 src = expanded; 642 } 643 644 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 645 646 if (intel->gen < 6) { 647 inst->base_mrf = 2; 648 inst->mlen = 1; 649 } 650 651 return inst; 652} 653 654fs_inst * 655fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 656{ 657 int base_mrf = 2; 658 fs_inst *inst; 659 660 assert(opcode == FS_OPCODE_POW); 661 662 if (intel->gen >= 6) { 663 /* Can't do hstride == 0 args to gen6 math, so expand it out. 664 * 665 * The hardware ignores source modifiers (negate and abs) on math 666 * instructions, so we also move to a temp to set those up. 667 */ 668 if (src0.file == UNIFORM || src0.abs || src0.negate) { 669 fs_reg expanded = fs_reg(this, glsl_type::float_type); 670 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 671 src0 = expanded; 672 } 673 674 if (src1.file == UNIFORM || src1.abs || src1.negate) { 675 fs_reg expanded = fs_reg(this, glsl_type::float_type); 676 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 677 src1 = expanded; 678 } 679 680 inst = emit(fs_inst(opcode, dst, src0, src1)); 681 } else { 682 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 683 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 684 685 inst->base_mrf = base_mrf; 686 inst->mlen = 2; 687 } 688 return inst; 689} 690 691void 692fs_visitor::visit(ir_variable *ir) 693{ 694 fs_reg *reg = NULL; 695 696 if (variable_storage(ir)) 697 return; 698 699 if (strcmp(ir->name, "gl_FragColor") == 0) { 700 this->frag_color = ir; 701 } else if (strcmp(ir->name, "gl_FragData") == 0) { 702 this->frag_data = ir; 703 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 704 this->frag_depth = ir; 705 } 706 707 if (ir->mode == ir_var_in) { 708 if (!strcmp(ir->name, "gl_FragCoord")) { 709 reg = emit_fragcoord_interpolation(ir); 710 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 711 reg = emit_frontfacing_interpolation(ir); 712 } else { 713 reg = emit_general_interpolation(ir); 714 } 715 assert(reg); 716 hash_table_insert(this->variable_ht, reg, ir); 717 return; 718 } 719 720 if (ir->mode == ir_var_uniform) { 721 int param_index = c->prog_data.nr_params; 722 723 if (!strncmp(ir->name, "gl_", 3)) { 724 setup_builtin_uniform_values(ir); 725 } else { 726 setup_uniform_values(ir->location, ir->type); 727 } 728 729 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 730 reg->type = brw_type_for_base_type(ir->type); 731 } 732 733 if (!reg) 734 reg = new(this->mem_ctx) fs_reg(this, ir->type); 735 736 hash_table_insert(this->variable_ht, reg, ir); 737} 738 739void 740fs_visitor::visit(ir_dereference_variable *ir) 741{ 742 fs_reg *reg = variable_storage(ir->var); 743 this->result = *reg; 744} 745 746void 747fs_visitor::visit(ir_dereference_record *ir) 748{ 749 const glsl_type *struct_type = ir->record->type; 750 751 ir->record->accept(this); 752 753 unsigned int offset = 0; 754 for (unsigned int i = 0; i < struct_type->length; i++) { 755 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 756 break; 757 offset += type_size(struct_type->fields.structure[i].type); 758 } 759 this->result.reg_offset += offset; 760 this->result.type = brw_type_for_base_type(ir->type); 761} 762 763void 764fs_visitor::visit(ir_dereference_array *ir) 765{ 766 ir_constant *index; 767 int element_size; 768 769 ir->array->accept(this); 770 index = ir->array_index->as_constant(); 771 772 element_size = type_size(ir->type); 773 this->result.type = brw_type_for_base_type(ir->type); 774 775 if (index) { 776 assert(this->result.file == UNIFORM || 777 (this->result.file == GRF && 778 this->result.reg != 0)); 779 this->result.reg_offset += index->value.i[0] * element_size; 780 } else { 781 assert(!"FINISHME: non-constant array element"); 782 } 783} 784 785/* Instruction selection: Produce a MOV.sat instead of 786 * MIN(MAX(val, 0), 1) when possible. 787 */ 788bool 789fs_visitor::try_emit_saturate(ir_expression *ir) 790{ 791 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 792 793 if (!sat_val) 794 return false; 795 796 sat_val->accept(this); 797 fs_reg src = this->result; 798 799 this->result = fs_reg(this, ir->type); 800 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); 801 inst->saturate = true; 802 803 return true; 804} 805 806static uint32_t 807brw_conditional_for_comparison(unsigned int op) 808{ 809 switch (op) { 810 case ir_binop_less: 811 return BRW_CONDITIONAL_L; 812 case ir_binop_greater: 813 return BRW_CONDITIONAL_G; 814 case ir_binop_lequal: 815 return BRW_CONDITIONAL_LE; 816 case ir_binop_gequal: 817 return BRW_CONDITIONAL_GE; 818 case ir_binop_equal: 819 case ir_binop_all_equal: /* same as equal for scalars */ 820 return BRW_CONDITIONAL_Z; 821 case ir_binop_nequal: 822 case ir_binop_any_nequal: /* same as nequal for scalars */ 823 return BRW_CONDITIONAL_NZ; 824 default: 825 assert(!"not reached: bad operation for comparison"); 826 return BRW_CONDITIONAL_NZ; 827 } 828} 829 830void 831fs_visitor::visit(ir_expression *ir) 832{ 833 unsigned int operand; 834 fs_reg op[2], temp; 835 fs_inst *inst; 836 837 assert(ir->get_num_operands() <= 2); 838 839 if (try_emit_saturate(ir)) 840 return; 841 842 for (operand = 0; operand < ir->get_num_operands(); operand++) { 843 ir->operands[operand]->accept(this); 844 if (this->result.file == BAD_FILE) { 845 ir_print_visitor v; 846 printf("Failed to get tree for expression operand:\n"); 847 ir->operands[operand]->accept(&v); 848 this->fail = true; 849 } 850 op[operand] = this->result; 851 852 /* Matrix expression operands should have been broken down to vector 853 * operations already. 854 */ 855 assert(!ir->operands[operand]->type->is_matrix()); 856 /* And then those vector operands should have been broken down to scalar. 857 */ 858 assert(!ir->operands[operand]->type->is_vector()); 859 } 860 861 /* Storage for our result. If our result goes into an assignment, it will 862 * just get copy-propagated out, so no worries. 863 */ 864 this->result = fs_reg(this, ir->type); 865 866 switch (ir->operation) { 867 case ir_unop_logic_not: 868 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 869 * ones complement of the whole register, not just bit 0. 870 */ 871 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 872 break; 873 case ir_unop_neg: 874 op[0].negate = !op[0].negate; 875 this->result = op[0]; 876 break; 877 case ir_unop_abs: 878 op[0].abs = true; 879 op[0].negate = false; 880 this->result = op[0]; 881 break; 882 case ir_unop_sign: 883 temp = fs_reg(this, ir->type); 884 885 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 886 887 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 888 inst->conditional_mod = BRW_CONDITIONAL_G; 889 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 890 inst->predicated = true; 891 892 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 893 inst->conditional_mod = BRW_CONDITIONAL_L; 894 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 895 inst->predicated = true; 896 897 break; 898 case ir_unop_rcp: 899 emit_math(FS_OPCODE_RCP, this->result, op[0]); 900 break; 901 902 case ir_unop_exp2: 903 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 904 break; 905 case ir_unop_log2: 906 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 907 break; 908 case ir_unop_exp: 909 case ir_unop_log: 910 assert(!"not reached: should be handled by ir_explog_to_explog2"); 911 break; 912 case ir_unop_sin: 913 case ir_unop_sin_reduced: 914 emit_math(FS_OPCODE_SIN, this->result, op[0]); 915 break; 916 case ir_unop_cos: 917 case ir_unop_cos_reduced: 918 emit_math(FS_OPCODE_COS, this->result, op[0]); 919 break; 920 921 case ir_unop_dFdx: 922 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 923 break; 924 case ir_unop_dFdy: 925 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 926 break; 927 928 case ir_binop_add: 929 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 930 break; 931 case ir_binop_sub: 932 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 933 break; 934 935 case ir_binop_mul: 936 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 937 break; 938 case ir_binop_div: 939 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 940 break; 941 case ir_binop_mod: 942 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 943 break; 944 945 case ir_binop_less: 946 case ir_binop_greater: 947 case ir_binop_lequal: 948 case ir_binop_gequal: 949 case ir_binop_equal: 950 case ir_binop_all_equal: 951 case ir_binop_nequal: 952 case ir_binop_any_nequal: 953 temp = this->result; 954 /* original gen4 does implicit conversion before comparison. */ 955 if (intel->gen < 5) 956 temp.type = op[0].type; 957 958 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); 959 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 960 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 961 break; 962 963 case ir_binop_logic_xor: 964 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 965 break; 966 967 case ir_binop_logic_or: 968 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 969 break; 970 971 case ir_binop_logic_and: 972 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 973 break; 974 975 case ir_binop_dot: 976 case ir_unop_any: 977 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 978 break; 979 980 case ir_unop_noise: 981 assert(!"not reached: should be handled by lower_noise"); 982 break; 983 984 case ir_quadop_vector: 985 assert(!"not reached: should be handled by lower_quadop_vector"); 986 break; 987 988 case ir_unop_sqrt: 989 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 990 break; 991 992 case ir_unop_rsq: 993 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 994 break; 995 996 case ir_unop_i2f: 997 case ir_unop_b2f: 998 case ir_unop_b2i: 999 case ir_unop_f2i: 1000 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1001 break; 1002 case ir_unop_f2b: 1003 case ir_unop_i2b: 1004 temp = this->result; 1005 /* original gen4 does implicit conversion before comparison. */ 1006 if (intel->gen < 5) 1007 temp.type = op[0].type; 1008 1009 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); 1010 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1011 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 1012 this->result, fs_reg(1))); 1013 break; 1014 1015 case ir_unop_trunc: 1016 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 1017 break; 1018 case ir_unop_ceil: 1019 op[0].negate = !op[0].negate; 1020 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1021 this->result.negate = true; 1022 break; 1023 case ir_unop_floor: 1024 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1025 break; 1026 case ir_unop_fract: 1027 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1028 break; 1029 case ir_unop_round_even: 1030 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 1031 break; 1032 1033 case ir_binop_min: 1034 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1035 inst->conditional_mod = BRW_CONDITIONAL_L; 1036 1037 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1038 inst->predicated = true; 1039 break; 1040 case ir_binop_max: 1041 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1042 inst->conditional_mod = BRW_CONDITIONAL_G; 1043 1044 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1045 inst->predicated = true; 1046 break; 1047 1048 case ir_binop_pow: 1049 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 1050 break; 1051 1052 case ir_unop_bit_not: 1053 inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); 1054 break; 1055 case ir_binop_bit_and: 1056 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1057 break; 1058 case ir_binop_bit_xor: 1059 inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1060 break; 1061 case ir_binop_bit_or: 1062 inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1063 break; 1064 1065 case ir_unop_u2f: 1066 case ir_binop_lshift: 1067 case ir_binop_rshift: 1068 assert(!"GLSL 1.30 features unsupported"); 1069 break; 1070 } 1071} 1072 1073void 1074fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1075 const glsl_type *type, bool predicated) 1076{ 1077 switch (type->base_type) { 1078 case GLSL_TYPE_FLOAT: 1079 case GLSL_TYPE_UINT: 1080 case GLSL_TYPE_INT: 1081 case GLSL_TYPE_BOOL: 1082 for (unsigned int i = 0; i < type->components(); i++) { 1083 l.type = brw_type_for_base_type(type); 1084 r.type = brw_type_for_base_type(type); 1085 1086 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1087 inst->predicated = predicated; 1088 1089 l.reg_offset++; 1090 r.reg_offset++; 1091 } 1092 break; 1093 case GLSL_TYPE_ARRAY: 1094 for (unsigned int i = 0; i < type->length; i++) { 1095 emit_assignment_writes(l, r, type->fields.array, predicated); 1096 } 1097 break; 1098 1099 case GLSL_TYPE_STRUCT: 1100 for (unsigned int i = 0; i < type->length; i++) { 1101 emit_assignment_writes(l, r, type->fields.structure[i].type, 1102 predicated); 1103 } 1104 break; 1105 1106 case GLSL_TYPE_SAMPLER: 1107 break; 1108 1109 default: 1110 assert(!"not reached"); 1111 break; 1112 } 1113} 1114 1115void 1116fs_visitor::visit(ir_assignment *ir) 1117{ 1118 struct fs_reg l, r; 1119 fs_inst *inst; 1120 1121 /* FINISHME: arrays on the lhs */ 1122 ir->lhs->accept(this); 1123 l = this->result; 1124 1125 ir->rhs->accept(this); 1126 r = this->result; 1127 1128 assert(l.file != BAD_FILE); 1129 assert(r.file != BAD_FILE); 1130 1131 if (ir->condition) { 1132 emit_bool_to_cond_code(ir->condition); 1133 } 1134 1135 if (ir->lhs->type->is_scalar() || 1136 ir->lhs->type->is_vector()) { 1137 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1138 if (ir->write_mask & (1 << i)) { 1139 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1140 if (ir->condition) 1141 inst->predicated = true; 1142 r.reg_offset++; 1143 } 1144 l.reg_offset++; 1145 } 1146 } else { 1147 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1148 } 1149} 1150 1151fs_inst * 1152fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1153{ 1154 int mlen; 1155 int base_mrf = 1; 1156 bool simd16 = false; 1157 fs_reg orig_dst; 1158 1159 /* g0 header. */ 1160 mlen = 1; 1161 1162 if (ir->shadow_comparitor) { 1163 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1164 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1165 coordinate)); 1166 coordinate.reg_offset++; 1167 } 1168 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1169 mlen += 3; 1170 1171 if (ir->op == ir_tex) { 1172 /* There's no plain shadow compare message, so we use shadow 1173 * compare with a bias of 0.0. 1174 */ 1175 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1176 fs_reg(0.0f))); 1177 mlen++; 1178 } else if (ir->op == ir_txb) { 1179 ir->lod_info.bias->accept(this); 1180 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1181 this->result)); 1182 mlen++; 1183 } else { 1184 assert(ir->op == ir_txl); 1185 ir->lod_info.lod->accept(this); 1186 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1187 this->result)); 1188 mlen++; 1189 } 1190 1191 ir->shadow_comparitor->accept(this); 1192 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1193 mlen++; 1194 } else if (ir->op == ir_tex) { 1195 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1196 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1197 coordinate)); 1198 coordinate.reg_offset++; 1199 } 1200 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1201 mlen += 3; 1202 } else if (ir->op == ir_txd) { 1203 assert(!"TXD isn't supported on gen4 yet."); 1204 } else { 1205 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1206 * instructions. We'll need to do SIMD16 here. 1207 */ 1208 assert(ir->op == ir_txb || ir->op == ir_txl); 1209 1210 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1211 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1212 coordinate)); 1213 coordinate.reg_offset++; 1214 } 1215 1216 /* lod/bias appears after u/v/r. */ 1217 mlen += 6; 1218 1219 if (ir->op == ir_txb) { 1220 ir->lod_info.bias->accept(this); 1221 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1222 this->result)); 1223 mlen++; 1224 } else { 1225 ir->lod_info.lod->accept(this); 1226 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1227 this->result)); 1228 mlen++; 1229 } 1230 1231 /* The unused upper half. */ 1232 mlen++; 1233 1234 /* Now, since we're doing simd16, the return is 2 interleaved 1235 * vec4s where the odd-indexed ones are junk. We'll need to move 1236 * this weirdness around to the expected layout. 1237 */ 1238 simd16 = true; 1239 orig_dst = dst; 1240 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1241 2)); 1242 dst.type = BRW_REGISTER_TYPE_F; 1243 } 1244 1245 fs_inst *inst = NULL; 1246 switch (ir->op) { 1247 case ir_tex: 1248 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1249 break; 1250 case ir_txb: 1251 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1252 break; 1253 case ir_txl: 1254 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1255 break; 1256 case ir_txd: 1257 inst = emit(fs_inst(FS_OPCODE_TXD, dst)); 1258 break; 1259 case ir_txf: 1260 assert(!"GLSL 1.30 features unsupported"); 1261 break; 1262 } 1263 inst->base_mrf = base_mrf; 1264 inst->mlen = mlen; 1265 1266 if (simd16) { 1267 for (int i = 0; i < 4; i++) { 1268 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1269 orig_dst.reg_offset++; 1270 dst.reg_offset += 2; 1271 } 1272 } 1273 1274 return inst; 1275} 1276 1277fs_inst * 1278fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1279{ 1280 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1281 * optional parameters like shadow comparitor or LOD bias. If 1282 * optional parameters aren't present, those base slots are 1283 * optional and don't need to be included in the message. 1284 * 1285 * We don't fill in the unnecessary slots regardless, which may 1286 * look surprising in the disassembly. 1287 */ 1288 int mlen = 1; /* g0 header always present. */ 1289 int base_mrf = 1; 1290 1291 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1292 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1293 coordinate)); 1294 coordinate.reg_offset++; 1295 } 1296 mlen += ir->coordinate->type->vector_elements; 1297 1298 if (ir->shadow_comparitor) { 1299 mlen = MAX2(mlen, 5); 1300 1301 ir->shadow_comparitor->accept(this); 1302 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1303 mlen++; 1304 } 1305 1306 fs_inst *inst = NULL; 1307 switch (ir->op) { 1308 case ir_tex: 1309 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1310 break; 1311 case ir_txb: 1312 ir->lod_info.bias->accept(this); 1313 mlen = MAX2(mlen, 5); 1314 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1315 mlen++; 1316 1317 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1318 break; 1319 case ir_txl: 1320 ir->lod_info.lod->accept(this); 1321 mlen = MAX2(mlen, 5); 1322 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1323 mlen++; 1324 1325 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1326 break; 1327 case ir_txd: 1328 case ir_txf: 1329 assert(!"GLSL 1.30 features unsupported"); 1330 break; 1331 } 1332 inst->base_mrf = base_mrf; 1333 inst->mlen = mlen; 1334 1335 return inst; 1336} 1337 1338void 1339fs_visitor::visit(ir_texture *ir) 1340{ 1341 int sampler; 1342 fs_inst *inst = NULL; 1343 1344 ir->coordinate->accept(this); 1345 fs_reg coordinate = this->result; 1346 1347 if (ir->offset != NULL) { 1348 ir_constant *offset = ir->offset->as_constant(); 1349 assert(offset != NULL); 1350 1351 signed char offsets[3]; 1352 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) 1353 offsets[i] = (signed char) offset->value.i[i]; 1354 1355 /* Combine all three offsets into a single unsigned dword: 1356 * 1357 * bits 11:8 - U Offset (X component) 1358 * bits 7:4 - V Offset (Y component) 1359 * bits 3:0 - R Offset (Z component) 1360 */ 1361 unsigned offset_bits = 0; 1362 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { 1363 const unsigned shift = 4 * (2 - i); 1364 offset_bits |= (offsets[i] << shift) & (0xF << shift); 1365 } 1366 1367 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 1368 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 1369 fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD))); 1370 1371 /* Then set the offset bits in DWord 2 of the message header. */ 1372 emit(fs_inst(BRW_OPCODE_MOV, 1373 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1374 BRW_REGISTER_TYPE_UD)), 1375 fs_reg(brw_imm_uw(offset_bits)))); 1376 } 1377 1378 /* Should be lowered by do_lower_texture_projection */ 1379 assert(!ir->projector); 1380 1381 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1382 ctx->Shader.CurrentFragmentProgram, 1383 &brw->fragment_program->Base); 1384 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1385 1386 /* The 965 requires the EU to do the normalization of GL rectangle 1387 * texture coordinates. We use the program parameter state 1388 * tracking to get the scaling factor. 1389 */ 1390 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1391 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1392 int tokens[STATE_LENGTH] = { 1393 STATE_INTERNAL, 1394 STATE_TEXRECT_SCALE, 1395 sampler, 1396 0, 1397 0 1398 }; 1399 1400 c->prog_data.param_convert[c->prog_data.nr_params] = 1401 PARAM_NO_CONVERT; 1402 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1403 PARAM_NO_CONVERT; 1404 1405 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1406 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1407 GLuint index = _mesa_add_state_reference(params, 1408 (gl_state_index *)tokens); 1409 1410 this->param_index[c->prog_data.nr_params] = index; 1411 this->param_offset[c->prog_data.nr_params] = 0; 1412 c->prog_data.nr_params++; 1413 this->param_index[c->prog_data.nr_params] = index; 1414 this->param_offset[c->prog_data.nr_params] = 1; 1415 c->prog_data.nr_params++; 1416 1417 fs_reg dst = fs_reg(this, ir->coordinate->type); 1418 fs_reg src = coordinate; 1419 coordinate = dst; 1420 1421 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1422 dst.reg_offset++; 1423 src.reg_offset++; 1424 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1425 } 1426 1427 /* Writemasking doesn't eliminate channels on SIMD8 texture 1428 * samples, so don't worry about them. 1429 */ 1430 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1431 1432 if (intel->gen < 5) { 1433 inst = emit_texture_gen4(ir, dst, coordinate); 1434 } else { 1435 inst = emit_texture_gen5(ir, dst, coordinate); 1436 } 1437 1438 /* If there's an offset, we already set up m1. To avoid the implied move, 1439 * use the null register. Otherwise, we want an implied move from g0. 1440 */ 1441 if (ir->offset != NULL) 1442 inst->src[0] = fs_reg(brw_null_reg()); 1443 else 1444 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1445 1446 inst->sampler = sampler; 1447 1448 this->result = dst; 1449 1450 if (ir->shadow_comparitor) 1451 inst->shadow_compare = true; 1452 1453 if (ir->type == glsl_type::float_type) { 1454 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1455 assert(ir->sampler->type->sampler_shadow); 1456 } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1457 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1458 1459 for (int i = 0; i < 4; i++) { 1460 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1461 fs_reg l = swizzle_dst; 1462 l.reg_offset += i; 1463 1464 if (swiz == SWIZZLE_ZERO) { 1465 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1466 } else if (swiz == SWIZZLE_ONE) { 1467 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1468 } else { 1469 fs_reg r = dst; 1470 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1471 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1472 } 1473 } 1474 this->result = swizzle_dst; 1475 } 1476} 1477 1478void 1479fs_visitor::visit(ir_swizzle *ir) 1480{ 1481 ir->val->accept(this); 1482 fs_reg val = this->result; 1483 1484 if (ir->type->vector_elements == 1) { 1485 this->result.reg_offset += ir->mask.x; 1486 return; 1487 } 1488 1489 fs_reg result = fs_reg(this, ir->type); 1490 this->result = result; 1491 1492 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1493 fs_reg channel = val; 1494 int swiz = 0; 1495 1496 switch (i) { 1497 case 0: 1498 swiz = ir->mask.x; 1499 break; 1500 case 1: 1501 swiz = ir->mask.y; 1502 break; 1503 case 2: 1504 swiz = ir->mask.z; 1505 break; 1506 case 3: 1507 swiz = ir->mask.w; 1508 break; 1509 } 1510 1511 channel.reg_offset += swiz; 1512 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1513 result.reg_offset++; 1514 } 1515} 1516 1517void 1518fs_visitor::visit(ir_discard *ir) 1519{ 1520 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1521 1522 assert(ir->condition == NULL); /* FINISHME */ 1523 1524 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1525 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1526 kill_emitted = true; 1527} 1528 1529void 1530fs_visitor::visit(ir_constant *ir) 1531{ 1532 /* Set this->result to reg at the bottom of the function because some code 1533 * paths will cause this visitor to be applied to other fields. This will 1534 * cause the value stored in this->result to be modified. 1535 * 1536 * Make reg constant so that it doesn't get accidentally modified along the 1537 * way. Yes, I actually had this problem. :( 1538 */ 1539 const fs_reg reg(this, ir->type); 1540 fs_reg dst_reg = reg; 1541 1542 if (ir->type->is_array()) { 1543 const unsigned size = type_size(ir->type->fields.array); 1544 1545 for (unsigned i = 0; i < ir->type->length; i++) { 1546 ir->array_elements[i]->accept(this); 1547 fs_reg src_reg = this->result; 1548 1549 dst_reg.type = src_reg.type; 1550 for (unsigned j = 0; j < size; j++) { 1551 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1552 src_reg.reg_offset++; 1553 dst_reg.reg_offset++; 1554 } 1555 } 1556 } else if (ir->type->is_record()) { 1557 foreach_list(node, &ir->components) { 1558 ir_instruction *const field = (ir_instruction *) node; 1559 const unsigned size = type_size(field->type); 1560 1561 field->accept(this); 1562 fs_reg src_reg = this->result; 1563 1564 dst_reg.type = src_reg.type; 1565 for (unsigned j = 0; j < size; j++) { 1566 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1567 src_reg.reg_offset++; 1568 dst_reg.reg_offset++; 1569 } 1570 } 1571 } else { 1572 const unsigned size = type_size(ir->type); 1573 1574 for (unsigned i = 0; i < size; i++) { 1575 switch (ir->type->base_type) { 1576 case GLSL_TYPE_FLOAT: 1577 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); 1578 break; 1579 case GLSL_TYPE_UINT: 1580 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); 1581 break; 1582 case GLSL_TYPE_INT: 1583 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); 1584 break; 1585 case GLSL_TYPE_BOOL: 1586 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); 1587 break; 1588 default: 1589 assert(!"Non-float/uint/int/bool constant"); 1590 } 1591 dst_reg.reg_offset++; 1592 } 1593 } 1594 1595 this->result = reg; 1596} 1597 1598void 1599fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1600{ 1601 ir_expression *expr = ir->as_expression(); 1602 1603 if (expr) { 1604 fs_reg op[2]; 1605 fs_inst *inst; 1606 1607 assert(expr->get_num_operands() <= 2); 1608 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1609 assert(expr->operands[i]->type->is_scalar()); 1610 1611 expr->operands[i]->accept(this); 1612 op[i] = this->result; 1613 } 1614 1615 switch (expr->operation) { 1616 case ir_unop_logic_not: 1617 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1618 inst->conditional_mod = BRW_CONDITIONAL_Z; 1619 break; 1620 1621 case ir_binop_logic_xor: 1622 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1623 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1624 break; 1625 1626 case ir_binop_logic_or: 1627 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1628 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1629 break; 1630 1631 case ir_binop_logic_and: 1632 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1633 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1634 break; 1635 1636 case ir_unop_f2b: 1637 if (intel->gen >= 6) { 1638 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1639 op[0], fs_reg(0.0f))); 1640 } else { 1641 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); 1642 } 1643 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1644 break; 1645 1646 case ir_unop_i2b: 1647 if (intel->gen >= 6) { 1648 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1649 } else { 1650 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1651 } 1652 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1653 break; 1654 1655 case ir_binop_greater: 1656 case ir_binop_gequal: 1657 case ir_binop_less: 1658 case ir_binop_lequal: 1659 case ir_binop_equal: 1660 case ir_binop_all_equal: 1661 case ir_binop_nequal: 1662 case ir_binop_any_nequal: 1663 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); 1664 inst->conditional_mod = 1665 brw_conditional_for_comparison(expr->operation); 1666 break; 1667 1668 default: 1669 assert(!"not reached"); 1670 this->fail = true; 1671 break; 1672 } 1673 return; 1674 } 1675 1676 ir->accept(this); 1677 1678 if (intel->gen >= 6) { 1679 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1680 this->result, fs_reg(1))); 1681 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1682 } else { 1683 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1684 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1685 } 1686} 1687 1688/** 1689 * Emit a gen6 IF statement with the comparison folded into the IF 1690 * instruction. 1691 */ 1692void 1693fs_visitor::emit_if_gen6(ir_if *ir) 1694{ 1695 ir_expression *expr = ir->condition->as_expression(); 1696 1697 if (expr) { 1698 fs_reg op[2]; 1699 fs_inst *inst; 1700 fs_reg temp; 1701 1702 assert(expr->get_num_operands() <= 2); 1703 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1704 assert(expr->operands[i]->type->is_scalar()); 1705 1706 expr->operands[i]->accept(this); 1707 op[i] = this->result; 1708 } 1709 1710 switch (expr->operation) { 1711 case ir_unop_logic_not: 1712 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); 1713 inst->conditional_mod = BRW_CONDITIONAL_Z; 1714 return; 1715 1716 case ir_binop_logic_xor: 1717 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1718 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1719 return; 1720 1721 case ir_binop_logic_or: 1722 temp = fs_reg(this, glsl_type::bool_type); 1723 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1724 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1725 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1726 return; 1727 1728 case ir_binop_logic_and: 1729 temp = fs_reg(this, glsl_type::bool_type); 1730 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1731 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1732 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1733 return; 1734 1735 case ir_unop_f2b: 1736 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1737 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1738 return; 1739 1740 case ir_unop_i2b: 1741 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1742 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1743 return; 1744 1745 case ir_binop_greater: 1746 case ir_binop_gequal: 1747 case ir_binop_less: 1748 case ir_binop_lequal: 1749 case ir_binop_equal: 1750 case ir_binop_all_equal: 1751 case ir_binop_nequal: 1752 case ir_binop_any_nequal: 1753 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1754 inst->conditional_mod = 1755 brw_conditional_for_comparison(expr->operation); 1756 return; 1757 default: 1758 assert(!"not reached"); 1759 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1760 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1761 this->fail = true; 1762 return; 1763 } 1764 return; 1765 } 1766 1767 ir->condition->accept(this); 1768 1769 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1770 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1771} 1772 1773void 1774fs_visitor::visit(ir_if *ir) 1775{ 1776 fs_inst *inst; 1777 1778 /* Don't point the annotation at the if statement, because then it plus 1779 * the then and else blocks get printed. 1780 */ 1781 this->base_ir = ir->condition; 1782 1783 if (intel->gen >= 6) { 1784 emit_if_gen6(ir); 1785 } else { 1786 emit_bool_to_cond_code(ir->condition); 1787 1788 inst = emit(fs_inst(BRW_OPCODE_IF)); 1789 inst->predicated = true; 1790 } 1791 1792 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1793 ir_instruction *ir = (ir_instruction *)iter.get(); 1794 this->base_ir = ir; 1795 1796 ir->accept(this); 1797 } 1798 1799 if (!ir->else_instructions.is_empty()) { 1800 emit(fs_inst(BRW_OPCODE_ELSE)); 1801 1802 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1803 ir_instruction *ir = (ir_instruction *)iter.get(); 1804 this->base_ir = ir; 1805 1806 ir->accept(this); 1807 } 1808 } 1809 1810 emit(fs_inst(BRW_OPCODE_ENDIF)); 1811} 1812 1813void 1814fs_visitor::visit(ir_loop *ir) 1815{ 1816 fs_reg counter = reg_undef; 1817 1818 if (ir->counter) { 1819 this->base_ir = ir->counter; 1820 ir->counter->accept(this); 1821 counter = *(variable_storage(ir->counter)); 1822 1823 if (ir->from) { 1824 this->base_ir = ir->from; 1825 ir->from->accept(this); 1826 1827 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1828 } 1829 } 1830 1831 emit(fs_inst(BRW_OPCODE_DO)); 1832 1833 if (ir->to) { 1834 this->base_ir = ir->to; 1835 ir->to->accept(this); 1836 1837 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, 1838 counter, this->result)); 1839 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1840 1841 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1842 inst->predicated = true; 1843 } 1844 1845 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1846 ir_instruction *ir = (ir_instruction *)iter.get(); 1847 1848 this->base_ir = ir; 1849 ir->accept(this); 1850 } 1851 1852 if (ir->increment) { 1853 this->base_ir = ir->increment; 1854 ir->increment->accept(this); 1855 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1856 } 1857 1858 emit(fs_inst(BRW_OPCODE_WHILE)); 1859} 1860 1861void 1862fs_visitor::visit(ir_loop_jump *ir) 1863{ 1864 switch (ir->mode) { 1865 case ir_loop_jump::jump_break: 1866 emit(fs_inst(BRW_OPCODE_BREAK)); 1867 break; 1868 case ir_loop_jump::jump_continue: 1869 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1870 break; 1871 } 1872} 1873 1874void 1875fs_visitor::visit(ir_call *ir) 1876{ 1877 assert(!"FINISHME"); 1878} 1879 1880void 1881fs_visitor::visit(ir_return *ir) 1882{ 1883 assert(!"FINISHME"); 1884} 1885 1886void 1887fs_visitor::visit(ir_function *ir) 1888{ 1889 /* Ignore function bodies other than main() -- we shouldn't see calls to 1890 * them since they should all be inlined before we get to ir_to_mesa. 1891 */ 1892 if (strcmp(ir->name, "main") == 0) { 1893 const ir_function_signature *sig; 1894 exec_list empty; 1895 1896 sig = ir->matching_signature(&empty); 1897 1898 assert(sig); 1899 1900 foreach_iter(exec_list_iterator, iter, sig->body) { 1901 ir_instruction *ir = (ir_instruction *)iter.get(); 1902 this->base_ir = ir; 1903 1904 ir->accept(this); 1905 } 1906 } 1907} 1908 1909void 1910fs_visitor::visit(ir_function_signature *ir) 1911{ 1912 assert(!"not reached"); 1913 (void)ir; 1914} 1915 1916fs_inst * 1917fs_visitor::emit(fs_inst inst) 1918{ 1919 fs_inst *list_inst = new(mem_ctx) fs_inst; 1920 *list_inst = inst; 1921 1922 list_inst->annotation = this->current_annotation; 1923 list_inst->ir = this->base_ir; 1924 1925 this->instructions.push_tail(list_inst); 1926 1927 return list_inst; 1928} 1929 1930/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1931void 1932fs_visitor::emit_dummy_fs() 1933{ 1934 /* Everyone's favorite color. */ 1935 emit(fs_inst(BRW_OPCODE_MOV, 1936 fs_reg(MRF, 2), 1937 fs_reg(1.0f))); 1938 emit(fs_inst(BRW_OPCODE_MOV, 1939 fs_reg(MRF, 3), 1940 fs_reg(0.0f))); 1941 emit(fs_inst(BRW_OPCODE_MOV, 1942 fs_reg(MRF, 4), 1943 fs_reg(1.0f))); 1944 emit(fs_inst(BRW_OPCODE_MOV, 1945 fs_reg(MRF, 5), 1946 fs_reg(0.0f))); 1947 1948 fs_inst *write; 1949 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1950 fs_reg(0), 1951 fs_reg(0))); 1952 write->base_mrf = 0; 1953} 1954 1955/* The register location here is relative to the start of the URB 1956 * data. It will get adjusted to be a real location before 1957 * generate_code() time. 1958 */ 1959struct brw_reg 1960fs_visitor::interp_reg(int location, int channel) 1961{ 1962 int regnr = urb_setup[location] * 2 + channel / 2; 1963 int stride = (channel & 1) * 4; 1964 1965 assert(urb_setup[location] != -1); 1966 1967 return brw_vec1_grf(regnr, stride); 1968} 1969 1970/** Emits the interpolation for the varying inputs. */ 1971void 1972fs_visitor::emit_interpolation_setup_gen4() 1973{ 1974 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1975 1976 this->current_annotation = "compute pixel centers"; 1977 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1978 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1979 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1980 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1981 emit(fs_inst(BRW_OPCODE_ADD, 1982 this->pixel_x, 1983 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1984 fs_reg(brw_imm_v(0x10101010)))); 1985 emit(fs_inst(BRW_OPCODE_ADD, 1986 this->pixel_y, 1987 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1988 fs_reg(brw_imm_v(0x11001100)))); 1989 1990 this->current_annotation = "compute pixel deltas from v0"; 1991 if (brw->has_pln) { 1992 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1993 this->delta_y = this->delta_x; 1994 this->delta_y.reg_offset++; 1995 } else { 1996 this->delta_x = fs_reg(this, glsl_type::float_type); 1997 this->delta_y = fs_reg(this, glsl_type::float_type); 1998 } 1999 emit(fs_inst(BRW_OPCODE_ADD, 2000 this->delta_x, 2001 this->pixel_x, 2002 fs_reg(negate(brw_vec1_grf(1, 0))))); 2003 emit(fs_inst(BRW_OPCODE_ADD, 2004 this->delta_y, 2005 this->pixel_y, 2006 fs_reg(negate(brw_vec1_grf(1, 1))))); 2007 2008 this->current_annotation = "compute pos.w and 1/pos.w"; 2009 /* Compute wpos.w. It's always in our setup, since it's needed to 2010 * interpolate the other attributes. 2011 */ 2012 this->wpos_w = fs_reg(this, glsl_type::float_type); 2013 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 2014 interp_reg(FRAG_ATTRIB_WPOS, 3))); 2015 /* Compute the pixel 1/W value from wpos.w. */ 2016 this->pixel_w = fs_reg(this, glsl_type::float_type); 2017 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 2018 this->current_annotation = NULL; 2019} 2020 2021/** Emits the interpolation for the varying inputs. */ 2022void 2023fs_visitor::emit_interpolation_setup_gen6() 2024{ 2025 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 2026 2027 /* If the pixel centers end up used, the setup is the same as for gen4. */ 2028 this->current_annotation = "compute pixel centers"; 2029 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 2030 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 2031 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 2032 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 2033 emit(fs_inst(BRW_OPCODE_ADD, 2034 int_pixel_x, 2035 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 2036 fs_reg(brw_imm_v(0x10101010)))); 2037 emit(fs_inst(BRW_OPCODE_ADD, 2038 int_pixel_y, 2039 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 2040 fs_reg(brw_imm_v(0x11001100)))); 2041 2042 /* As of gen6, we can no longer mix float and int sources. We have 2043 * to turn the integer pixel centers into floats for their actual 2044 * use. 2045 */ 2046 this->pixel_x = fs_reg(this, glsl_type::float_type); 2047 this->pixel_y = fs_reg(this, glsl_type::float_type); 2048 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 2049 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 2050 2051 this->current_annotation = "compute 1/pos.w"; 2052 this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 2053 this->pixel_w = fs_reg(this, glsl_type::float_type); 2054 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 2055 2056 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 2057 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 2058 2059 this->current_annotation = NULL; 2060} 2061 2062void 2063fs_visitor::emit_fb_writes() 2064{ 2065 this->current_annotation = "FB write header"; 2066 GLboolean header_present = GL_TRUE; 2067 int nr = 0; 2068 2069 if (intel->gen >= 6 && 2070 !this->kill_emitted && 2071 c->key.nr_color_regions == 1) { 2072 header_present = false; 2073 } 2074 2075 if (header_present) { 2076 /* m0, m1 header */ 2077 nr += 2; 2078 } 2079 2080 if (c->aa_dest_stencil_reg) { 2081 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2082 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); 2083 } 2084 2085 /* Reserve space for color. It'll be filled in per MRT below. */ 2086 int color_mrf = nr; 2087 nr += 4; 2088 2089 if (c->source_depth_to_render_target) { 2090 if (c->computes_depth) { 2091 /* Hand over gl_FragDepth. */ 2092 assert(this->frag_depth); 2093 fs_reg depth = *(variable_storage(this->frag_depth)); 2094 2095 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 2096 } else { 2097 /* Pass through the payload depth. */ 2098 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2099 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 2100 } 2101 } 2102 2103 if (c->dest_depth_reg) { 2104 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2105 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); 2106 } 2107 2108 fs_reg color = reg_undef; 2109 if (this->frag_color) 2110 color = *(variable_storage(this->frag_color)); 2111 else if (this->frag_data) { 2112 color = *(variable_storage(this->frag_data)); 2113 color.type = BRW_REGISTER_TYPE_F; 2114 } 2115 2116 for (int target = 0; target < c->key.nr_color_regions; target++) { 2117 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2118 "FB write target %d", 2119 target); 2120 if (this->frag_color || this->frag_data) { 2121 for (int i = 0; i < 4; i++) { 2122 emit(fs_inst(BRW_OPCODE_MOV, 2123 fs_reg(MRF, color_mrf + i), 2124 color)); 2125 color.reg_offset++; 2126 } 2127 } 2128 2129 if (this->frag_color) 2130 color.reg_offset -= 4; 2131 2132 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2133 reg_undef, reg_undef)); 2134 inst->target = target; 2135 inst->base_mrf = 0; 2136 inst->mlen = nr; 2137 if (target == c->key.nr_color_regions - 1) 2138 inst->eot = true; 2139 inst->header_present = header_present; 2140 } 2141 2142 if (c->key.nr_color_regions == 0) { 2143 if (c->key.alpha_test && (this->frag_color || this->frag_data)) { 2144 /* If the alpha test is enabled but there's no color buffer, 2145 * we still need to send alpha out the pipeline to our null 2146 * renderbuffer. 2147 */ 2148 color.reg_offset += 3; 2149 emit(fs_inst(BRW_OPCODE_MOV, 2150 fs_reg(MRF, color_mrf + 3), 2151 color)); 2152 } 2153 2154 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2155 reg_undef, reg_undef)); 2156 inst->base_mrf = 0; 2157 inst->mlen = nr; 2158 inst->eot = true; 2159 inst->header_present = header_present; 2160 } 2161 2162 this->current_annotation = NULL; 2163} 2164 2165void 2166fs_visitor::generate_fb_write(fs_inst *inst) 2167{ 2168 GLboolean eot = inst->eot; 2169 struct brw_reg implied_header; 2170 2171 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 2172 * move, here's g1. 2173 */ 2174 brw_push_insn_state(p); 2175 brw_set_mask_control(p, BRW_MASK_DISABLE); 2176 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2177 2178 if (inst->header_present) { 2179 if (intel->gen >= 6) { 2180 brw_MOV(p, 2181 brw_message_reg(inst->base_mrf), 2182 brw_vec8_grf(0, 0)); 2183 2184 if (inst->target > 0) { 2185 /* Set the render target index for choosing BLEND_STATE. */ 2186 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 2187 BRW_REGISTER_TYPE_UD), 2188 brw_imm_ud(inst->target)); 2189 } 2190 2191 /* Clear viewport index, render target array index. */ 2192 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 2193 BRW_REGISTER_TYPE_UD), 2194 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 2195 brw_imm_ud(0xf7ff)); 2196 2197 implied_header = brw_null_reg(); 2198 } else { 2199 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 2200 } 2201 2202 brw_MOV(p, 2203 brw_message_reg(inst->base_mrf + 1), 2204 brw_vec8_grf(1, 0)); 2205 } else { 2206 implied_header = brw_null_reg(); 2207 } 2208 2209 brw_pop_insn_state(p); 2210 2211 brw_fb_WRITE(p, 2212 8, /* dispatch_width */ 2213 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 2214 inst->base_mrf, 2215 implied_header, 2216 inst->target, 2217 inst->mlen, 2218 0, 2219 eot, 2220 inst->header_present); 2221} 2222 2223void 2224fs_visitor::generate_linterp(fs_inst *inst, 2225 struct brw_reg dst, struct brw_reg *src) 2226{ 2227 struct brw_reg delta_x = src[0]; 2228 struct brw_reg delta_y = src[1]; 2229 struct brw_reg interp = src[2]; 2230 2231 if (brw->has_pln && 2232 delta_y.nr == delta_x.nr + 1 && 2233 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2234 brw_PLN(p, dst, interp, delta_x); 2235 } else { 2236 brw_LINE(p, brw_null_reg(), interp, delta_x); 2237 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2238 } 2239} 2240 2241void 2242fs_visitor::generate_math(fs_inst *inst, 2243 struct brw_reg dst, struct brw_reg *src) 2244{ 2245 int op; 2246 2247 switch (inst->opcode) { 2248 case FS_OPCODE_RCP: 2249 op = BRW_MATH_FUNCTION_INV; 2250 break; 2251 case FS_OPCODE_RSQ: 2252 op = BRW_MATH_FUNCTION_RSQ; 2253 break; 2254 case FS_OPCODE_SQRT: 2255 op = BRW_MATH_FUNCTION_SQRT; 2256 break; 2257 case FS_OPCODE_EXP2: 2258 op = BRW_MATH_FUNCTION_EXP; 2259 break; 2260 case FS_OPCODE_LOG2: 2261 op = BRW_MATH_FUNCTION_LOG; 2262 break; 2263 case FS_OPCODE_POW: 2264 op = BRW_MATH_FUNCTION_POW; 2265 break; 2266 case FS_OPCODE_SIN: 2267 op = BRW_MATH_FUNCTION_SIN; 2268 break; 2269 case FS_OPCODE_COS: 2270 op = BRW_MATH_FUNCTION_COS; 2271 break; 2272 default: 2273 assert(!"not reached: unknown math function"); 2274 op = 0; 2275 break; 2276 } 2277 2278 if (intel->gen >= 6) { 2279 assert(inst->mlen == 0); 2280 2281 if (inst->opcode == FS_OPCODE_POW) { 2282 brw_math2(p, dst, op, src[0], src[1]); 2283 } else { 2284 brw_math(p, dst, 2285 op, 2286 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2287 BRW_MATH_SATURATE_NONE, 2288 0, src[0], 2289 BRW_MATH_DATA_VECTOR, 2290 BRW_MATH_PRECISION_FULL); 2291 } 2292 } else { 2293 assert(inst->mlen >= 1); 2294 2295 brw_math(p, dst, 2296 op, 2297 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2298 BRW_MATH_SATURATE_NONE, 2299 inst->base_mrf, src[0], 2300 BRW_MATH_DATA_VECTOR, 2301 BRW_MATH_PRECISION_FULL); 2302 } 2303} 2304 2305void 2306fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2307{ 2308 int msg_type = -1; 2309 int rlen = 4; 2310 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2311 2312 if (intel->gen >= 5) { 2313 switch (inst->opcode) { 2314 case FS_OPCODE_TEX: 2315 if (inst->shadow_compare) { 2316 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; 2317 } else { 2318 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; 2319 } 2320 break; 2321 case FS_OPCODE_TXB: 2322 if (inst->shadow_compare) { 2323 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; 2324 } else { 2325 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; 2326 } 2327 break; 2328 case FS_OPCODE_TXL: 2329 if (inst->shadow_compare) { 2330 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; 2331 } else { 2332 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; 2333 } 2334 break; 2335 case FS_OPCODE_TXD: 2336 assert(!"TXD isn't supported on gen5+ yet."); 2337 break; 2338 } 2339 } else { 2340 switch (inst->opcode) { 2341 case FS_OPCODE_TEX: 2342 /* Note that G45 and older determines shadow compare and dispatch width 2343 * from message length for most messages. 2344 */ 2345 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2346 if (inst->shadow_compare) { 2347 assert(inst->mlen == 6); 2348 } else { 2349 assert(inst->mlen <= 4); 2350 } 2351 break; 2352 case FS_OPCODE_TXB: 2353 if (inst->shadow_compare) { 2354 assert(inst->mlen == 6); 2355 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; 2356 } else { 2357 assert(inst->mlen == 9); 2358 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2359 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2360 } 2361 break; 2362 case FS_OPCODE_TXL: 2363 if (inst->shadow_compare) { 2364 assert(inst->mlen == 6); 2365 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; 2366 } else { 2367 assert(inst->mlen == 9); 2368 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; 2369 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2370 } 2371 break; 2372 case FS_OPCODE_TXD: 2373 assert(!"TXD isn't supported on gen4 yet."); 2374 break; 2375 } 2376 } 2377 assert(msg_type != -1); 2378 2379 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2380 rlen = 8; 2381 dst = vec16(dst); 2382 } 2383 2384 brw_SAMPLE(p, 2385 retype(dst, BRW_REGISTER_TYPE_UW), 2386 inst->base_mrf, 2387 src, 2388 SURF_INDEX_TEXTURE(inst->sampler), 2389 inst->sampler, 2390 WRITEMASK_XYZW, 2391 msg_type, 2392 rlen, 2393 inst->mlen, 2394 0, 2395 1, 2396 simd_mode); 2397} 2398 2399 2400/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2401 * looking like: 2402 * 2403 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2404 * 2405 * and we're trying to produce: 2406 * 2407 * DDX DDY 2408 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2409 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2410 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2411 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2412 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2413 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2414 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2415 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2416 * 2417 * and add another set of two more subspans if in 16-pixel dispatch mode. 2418 * 2419 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2420 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2421 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2422 * between each other. We could probably do it like ddx and swizzle the right 2423 * order later, but bail for now and just produce 2424 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2425 */ 2426void 2427fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2428{ 2429 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2430 BRW_REGISTER_TYPE_F, 2431 BRW_VERTICAL_STRIDE_2, 2432 BRW_WIDTH_2, 2433 BRW_HORIZONTAL_STRIDE_0, 2434 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2435 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2436 BRW_REGISTER_TYPE_F, 2437 BRW_VERTICAL_STRIDE_2, 2438 BRW_WIDTH_2, 2439 BRW_HORIZONTAL_STRIDE_0, 2440 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2441 brw_ADD(p, dst, src0, negate(src1)); 2442} 2443 2444void 2445fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2446{ 2447 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2448 BRW_REGISTER_TYPE_F, 2449 BRW_VERTICAL_STRIDE_4, 2450 BRW_WIDTH_4, 2451 BRW_HORIZONTAL_STRIDE_0, 2452 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2453 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2454 BRW_REGISTER_TYPE_F, 2455 BRW_VERTICAL_STRIDE_4, 2456 BRW_WIDTH_4, 2457 BRW_HORIZONTAL_STRIDE_0, 2458 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2459 brw_ADD(p, dst, src0, negate(src1)); 2460} 2461 2462void 2463fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2464{ 2465 if (intel->gen >= 6) { 2466 /* Gen6 no longer has the mask reg for us to just read the 2467 * active channels from. However, cmp updates just the channels 2468 * of the flag reg that are enabled, so we can get at the 2469 * channel enables that way. In this step, make a reg of ones 2470 * we'll compare to. 2471 */ 2472 brw_MOV(p, mask, brw_imm_ud(1)); 2473 } else { 2474 brw_push_insn_state(p); 2475 brw_set_mask_control(p, BRW_MASK_DISABLE); 2476 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2477 brw_pop_insn_state(p); 2478 } 2479} 2480 2481void 2482fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2483{ 2484 if (intel->gen >= 6) { 2485 struct brw_reg f0 = brw_flag_reg(); 2486 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2487 2488 brw_push_insn_state(p); 2489 brw_set_mask_control(p, BRW_MASK_DISABLE); 2490 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2491 brw_pop_insn_state(p); 2492 2493 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2494 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2495 /* Undo CMP's whacking of predication*/ 2496 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2497 2498 brw_push_insn_state(p); 2499 brw_set_mask_control(p, BRW_MASK_DISABLE); 2500 brw_AND(p, g1, f0, g1); 2501 brw_pop_insn_state(p); 2502 } else { 2503 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2504 2505 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2506 2507 brw_push_insn_state(p); 2508 brw_set_mask_control(p, BRW_MASK_DISABLE); 2509 brw_AND(p, g0, mask, g0); 2510 brw_pop_insn_state(p); 2511 } 2512} 2513 2514void 2515fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2516{ 2517 assert(inst->mlen != 0); 2518 2519 brw_MOV(p, 2520 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2521 retype(src, BRW_REGISTER_TYPE_UD)); 2522 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2523 inst->offset); 2524} 2525 2526void 2527fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2528{ 2529 assert(inst->mlen != 0); 2530 2531 /* Clear any post destination dependencies that would be ignored by 2532 * the block read. See the B-Spec for pre-gen5 send instruction. 2533 * 2534 * This could use a better solution, since texture sampling and 2535 * math reads could potentially run into it as well -- anywhere 2536 * that we have a SEND with a destination that is a register that 2537 * was written but not read within the last N instructions (what's 2538 * N? unsure). This is rare because of dead code elimination, but 2539 * not impossible. 2540 */ 2541 if (intel->gen == 4 && !intel->is_g4x) 2542 brw_MOV(p, brw_null_reg(), dst); 2543 2544 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2545 inst->offset); 2546 2547 if (intel->gen == 4 && !intel->is_g4x) { 2548 /* gen4 errata: destination from a send can't be used as a 2549 * destination until it's been read. Just read it so we don't 2550 * have to worry. 2551 */ 2552 brw_MOV(p, brw_null_reg(), dst); 2553 } 2554} 2555 2556 2557void 2558fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2559{ 2560 assert(inst->mlen != 0); 2561 2562 /* Clear any post destination dependencies that would be ignored by 2563 * the block read. See the B-Spec for pre-gen5 send instruction. 2564 * 2565 * This could use a better solution, since texture sampling and 2566 * math reads could potentially run into it as well -- anywhere 2567 * that we have a SEND with a destination that is a register that 2568 * was written but not read within the last N instructions (what's 2569 * N? unsure). This is rare because of dead code elimination, but 2570 * not impossible. 2571 */ 2572 if (intel->gen == 4 && !intel->is_g4x) 2573 brw_MOV(p, brw_null_reg(), dst); 2574 2575 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2576 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2577 2578 if (intel->gen == 4 && !intel->is_g4x) { 2579 /* gen4 errata: destination from a send can't be used as a 2580 * destination until it's been read. Just read it so we don't 2581 * have to worry. 2582 */ 2583 brw_MOV(p, brw_null_reg(), dst); 2584 } 2585} 2586 2587/** 2588 * To be called after the last _mesa_add_state_reference() call, to 2589 * set up prog_data.param[] for assign_curb_setup() and 2590 * setup_pull_constants(). 2591 */ 2592void 2593fs_visitor::setup_paramvalues_refs() 2594{ 2595 /* Set up the pointers to ParamValues now that that array is finalized. */ 2596 for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { 2597 c->prog_data.param[i] = 2598 fp->Base.Parameters->ParameterValues[this->param_index[i]] + 2599 this->param_offset[i]; 2600 } 2601} 2602 2603void 2604fs_visitor::assign_curb_setup() 2605{ 2606 c->prog_data.first_curbe_grf = c->nr_payload_regs; 2607 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2608 2609 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2610 foreach_iter(exec_list_iterator, iter, this->instructions) { 2611 fs_inst *inst = (fs_inst *)iter.get(); 2612 2613 for (unsigned int i = 0; i < 3; i++) { 2614 if (inst->src[i].file == UNIFORM) { 2615 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2616 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2617 constant_nr / 8, 2618 constant_nr % 8); 2619 2620 inst->src[i].file = FIXED_HW_REG; 2621 inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 2622 } 2623 } 2624 } 2625} 2626 2627void 2628fs_visitor::calculate_urb_setup() 2629{ 2630 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2631 urb_setup[i] = -1; 2632 } 2633 2634 int urb_next = 0; 2635 /* Figure out where each of the incoming setup attributes lands. */ 2636 if (intel->gen >= 6) { 2637 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2638 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2639 urb_setup[i] = urb_next++; 2640 } 2641 } 2642 } else { 2643 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2644 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2645 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2646 int fp_index; 2647 2648 if (i >= VERT_RESULT_VAR0) 2649 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2650 else if (i <= VERT_RESULT_TEX7) 2651 fp_index = i; 2652 else 2653 fp_index = -1; 2654 2655 if (fp_index >= 0) 2656 urb_setup[fp_index] = urb_next++; 2657 } 2658 } 2659 } 2660 2661 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2662 c->prog_data.urb_read_length = urb_next * 2; 2663} 2664 2665void 2666fs_visitor::assign_urb_setup() 2667{ 2668 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2669 2670 /* Offset all the urb_setup[] index by the actual position of the 2671 * setup regs, now that the location of the constants has been chosen. 2672 */ 2673 foreach_iter(exec_list_iterator, iter, this->instructions) { 2674 fs_inst *inst = (fs_inst *)iter.get(); 2675 2676 if (inst->opcode == FS_OPCODE_LINTERP) { 2677 assert(inst->src[2].file == FIXED_HW_REG); 2678 inst->src[2].fixed_hw_reg.nr += urb_start; 2679 } 2680 2681 if (inst->opcode == FS_OPCODE_CINTERP) { 2682 assert(inst->src[0].file == FIXED_HW_REG); 2683 inst->src[0].fixed_hw_reg.nr += urb_start; 2684 } 2685 } 2686 2687 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2688} 2689 2690/** 2691 * Split large virtual GRFs into separate components if we can. 2692 * 2693 * This is mostly duplicated with what brw_fs_vector_splitting does, 2694 * but that's really conservative because it's afraid of doing 2695 * splitting that doesn't result in real progress after the rest of 2696 * the optimization phases, which would cause infinite looping in 2697 * optimization. We can do it once here, safely. This also has the 2698 * opportunity to split interpolated values, or maybe even uniforms, 2699 * which we don't have at the IR level. 2700 * 2701 * We want to split, because virtual GRFs are what we register 2702 * allocate and spill (due to contiguousness requirements for some 2703 * instructions), and they're what we naturally generate in the 2704 * codegen process, but most virtual GRFs don't actually need to be 2705 * contiguous sets of GRFs. If we split, we'll end up with reduced 2706 * live intervals and better dead code elimination and coalescing. 2707 */ 2708void 2709fs_visitor::split_virtual_grfs() 2710{ 2711 int num_vars = this->virtual_grf_next; 2712 bool split_grf[num_vars]; 2713 int new_virtual_grf[num_vars]; 2714 2715 /* Try to split anything > 0 sized. */ 2716 for (int i = 0; i < num_vars; i++) { 2717 if (this->virtual_grf_sizes[i] != 1) 2718 split_grf[i] = true; 2719 else 2720 split_grf[i] = false; 2721 } 2722 2723 if (brw->has_pln) { 2724 /* PLN opcodes rely on the delta_xy being contiguous. */ 2725 split_grf[this->delta_x.reg] = false; 2726 } 2727 2728 foreach_iter(exec_list_iterator, iter, this->instructions) { 2729 fs_inst *inst = (fs_inst *)iter.get(); 2730 2731 /* Texturing produces 4 contiguous registers, so no splitting. */ 2732 if (inst->is_tex()) { 2733 split_grf[inst->dst.reg] = false; 2734 } 2735 } 2736 2737 /* Allocate new space for split regs. Note that the virtual 2738 * numbers will be contiguous. 2739 */ 2740 for (int i = 0; i < num_vars; i++) { 2741 if (split_grf[i]) { 2742 new_virtual_grf[i] = virtual_grf_alloc(1); 2743 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2744 int reg = virtual_grf_alloc(1); 2745 assert(reg == new_virtual_grf[i] + j - 1); 2746 (void) reg; 2747 } 2748 this->virtual_grf_sizes[i] = 1; 2749 } 2750 } 2751 2752 foreach_iter(exec_list_iterator, iter, this->instructions) { 2753 fs_inst *inst = (fs_inst *)iter.get(); 2754 2755 if (inst->dst.file == GRF && 2756 split_grf[inst->dst.reg] && 2757 inst->dst.reg_offset != 0) { 2758 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2759 inst->dst.reg_offset - 1); 2760 inst->dst.reg_offset = 0; 2761 } 2762 for (int i = 0; i < 3; i++) { 2763 if (inst->src[i].file == GRF && 2764 split_grf[inst->src[i].reg] && 2765 inst->src[i].reg_offset != 0) { 2766 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2767 inst->src[i].reg_offset - 1); 2768 inst->src[i].reg_offset = 0; 2769 } 2770 } 2771 } 2772 this->live_intervals_valid = false; 2773} 2774 2775/** 2776 * Choose accesses from the UNIFORM file to demote to using the pull 2777 * constant buffer. 2778 * 2779 * We allow a fragment shader to have more than the specified minimum 2780 * maximum number of fragment shader uniform components (64). If 2781 * there are too many of these, they'd fill up all of register space. 2782 * So, this will push some of them out to the pull constant buffer and 2783 * update the program to load them. 2784 */ 2785void 2786fs_visitor::setup_pull_constants() 2787{ 2788 /* Only allow 16 registers (128 uniform components) as push constants. */ 2789 unsigned int max_uniform_components = 16 * 8; 2790 if (c->prog_data.nr_params <= max_uniform_components) 2791 return; 2792 2793 /* Just demote the end of the list. We could probably do better 2794 * here, demoting things that are rarely used in the program first. 2795 */ 2796 int pull_uniform_base = max_uniform_components; 2797 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2798 2799 foreach_iter(exec_list_iterator, iter, this->instructions) { 2800 fs_inst *inst = (fs_inst *)iter.get(); 2801 2802 for (int i = 0; i < 3; i++) { 2803 if (inst->src[i].file != UNIFORM) 2804 continue; 2805 2806 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2807 if (uniform_nr < pull_uniform_base) 2808 continue; 2809 2810 fs_reg dst = fs_reg(this, glsl_type::float_type); 2811 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2812 dst); 2813 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2814 pull->ir = inst->ir; 2815 pull->annotation = inst->annotation; 2816 pull->base_mrf = 14; 2817 pull->mlen = 1; 2818 2819 inst->insert_before(pull); 2820 2821 inst->src[i].file = GRF; 2822 inst->src[i].reg = dst.reg; 2823 inst->src[i].reg_offset = 0; 2824 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2825 } 2826 } 2827 2828 for (int i = 0; i < pull_uniform_count; i++) { 2829 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2830 c->prog_data.pull_param_convert[i] = 2831 c->prog_data.param_convert[pull_uniform_base + i]; 2832 } 2833 c->prog_data.nr_params -= pull_uniform_count; 2834 c->prog_data.nr_pull_params = pull_uniform_count; 2835} 2836 2837void 2838fs_visitor::calculate_live_intervals() 2839{ 2840 int num_vars = this->virtual_grf_next; 2841 int *def = ralloc_array(mem_ctx, int, num_vars); 2842 int *use = ralloc_array(mem_ctx, int, num_vars); 2843 int loop_depth = 0; 2844 int loop_start = 0; 2845 int bb_header_ip = 0; 2846 2847 if (this->live_intervals_valid) 2848 return; 2849 2850 for (int i = 0; i < num_vars; i++) { 2851 def[i] = MAX_INSTRUCTION; 2852 use[i] = -1; 2853 } 2854 2855 int ip = 0; 2856 foreach_iter(exec_list_iterator, iter, this->instructions) { 2857 fs_inst *inst = (fs_inst *)iter.get(); 2858 2859 if (inst->opcode == BRW_OPCODE_DO) { 2860 if (loop_depth++ == 0) 2861 loop_start = ip; 2862 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2863 loop_depth--; 2864 2865 if (loop_depth == 0) { 2866 /* Patches up the use of vars marked for being live across 2867 * the whole loop. 2868 */ 2869 for (int i = 0; i < num_vars; i++) { 2870 if (use[i] == loop_start) { 2871 use[i] = ip; 2872 } 2873 } 2874 } 2875 } else { 2876 for (unsigned int i = 0; i < 3; i++) { 2877 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2878 int reg = inst->src[i].reg; 2879 2880 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2881 def[reg] >= bb_header_ip)) { 2882 use[reg] = ip; 2883 } else { 2884 def[reg] = MIN2(loop_start, def[reg]); 2885 use[reg] = loop_start; 2886 2887 /* Nobody else is going to go smash our start to 2888 * later in the loop now, because def[reg] now 2889 * points before the bb header. 2890 */ 2891 } 2892 } 2893 } 2894 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2895 int reg = inst->dst.reg; 2896 2897 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2898 !inst->predicated)) { 2899 def[reg] = MIN2(def[reg], ip); 2900 } else { 2901 def[reg] = MIN2(def[reg], loop_start); 2902 } 2903 } 2904 } 2905 2906 ip++; 2907 2908 /* Set the basic block header IP. This is used for determining 2909 * if a complete def of single-register virtual GRF in a loop 2910 * dominates a use in the same basic block. It's a quick way to 2911 * reduce the live interval range of most register used in a 2912 * loop. 2913 */ 2914 if (inst->opcode == BRW_OPCODE_IF || 2915 inst->opcode == BRW_OPCODE_ELSE || 2916 inst->opcode == BRW_OPCODE_ENDIF || 2917 inst->opcode == BRW_OPCODE_DO || 2918 inst->opcode == BRW_OPCODE_WHILE || 2919 inst->opcode == BRW_OPCODE_BREAK || 2920 inst->opcode == BRW_OPCODE_CONTINUE) { 2921 bb_header_ip = ip; 2922 } 2923 } 2924 2925 ralloc_free(this->virtual_grf_def); 2926 ralloc_free(this->virtual_grf_use); 2927 this->virtual_grf_def = def; 2928 this->virtual_grf_use = use; 2929 2930 this->live_intervals_valid = true; 2931} 2932 2933/** 2934 * Attempts to move immediate constants into the immediate 2935 * constant slot of following instructions. 2936 * 2937 * Immediate constants are a bit tricky -- they have to be in the last 2938 * operand slot, you can't do abs/negate on them, 2939 */ 2940 2941bool 2942fs_visitor::propagate_constants() 2943{ 2944 bool progress = false; 2945 2946 calculate_live_intervals(); 2947 2948 foreach_iter(exec_list_iterator, iter, this->instructions) { 2949 fs_inst *inst = (fs_inst *)iter.get(); 2950 2951 if (inst->opcode != BRW_OPCODE_MOV || 2952 inst->predicated || 2953 inst->dst.file != GRF || inst->src[0].file != IMM || 2954 inst->dst.type != inst->src[0].type) 2955 continue; 2956 2957 /* Don't bother with cases where we should have had the 2958 * operation on the constant folded in GLSL already. 2959 */ 2960 if (inst->saturate) 2961 continue; 2962 2963 /* Found a move of a constant to a GRF. Find anything else using the GRF 2964 * before it's written, and replace it with the constant if we can. 2965 */ 2966 exec_list_iterator scan_iter = iter; 2967 scan_iter.next(); 2968 for (; scan_iter.has_next(); scan_iter.next()) { 2969 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2970 2971 if (scan_inst->opcode == BRW_OPCODE_DO || 2972 scan_inst->opcode == BRW_OPCODE_WHILE || 2973 scan_inst->opcode == BRW_OPCODE_ELSE || 2974 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2975 break; 2976 } 2977 2978 for (int i = 2; i >= 0; i--) { 2979 if (scan_inst->src[i].file != GRF || 2980 scan_inst->src[i].reg != inst->dst.reg || 2981 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2982 continue; 2983 2984 /* Don't bother with cases where we should have had the 2985 * operation on the constant folded in GLSL already. 2986 */ 2987 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2988 continue; 2989 2990 switch (scan_inst->opcode) { 2991 case BRW_OPCODE_MOV: 2992 scan_inst->src[i] = inst->src[0]; 2993 progress = true; 2994 break; 2995 2996 case BRW_OPCODE_MUL: 2997 case BRW_OPCODE_ADD: 2998 if (i == 1) { 2999 scan_inst->src[i] = inst->src[0]; 3000 progress = true; 3001 } else if (i == 0 && scan_inst->src[1].file != IMM) { 3002 /* Fit this constant in by commuting the operands */ 3003 scan_inst->src[0] = scan_inst->src[1]; 3004 scan_inst->src[1] = inst->src[0]; 3005 progress = true; 3006 } 3007 break; 3008 case BRW_OPCODE_CMP: 3009 case BRW_OPCODE_SEL: 3010 if (i == 1) { 3011 scan_inst->src[i] = inst->src[0]; 3012 progress = true; 3013 } 3014 } 3015 } 3016 3017 if (scan_inst->dst.file == GRF && 3018 scan_inst->dst.reg == inst->dst.reg && 3019 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 3020 scan_inst->is_tex())) { 3021 break; 3022 } 3023 } 3024 } 3025 3026 if (progress) 3027 this->live_intervals_valid = false; 3028 3029 return progress; 3030} 3031/** 3032 * Must be called after calculate_live_intervales() to remove unused 3033 * writes to registers -- register allocation will fail otherwise 3034 * because something deffed but not used won't be considered to 3035 * interfere with other regs. 3036 */ 3037bool 3038fs_visitor::dead_code_eliminate() 3039{ 3040 bool progress = false; 3041 int pc = 0; 3042 3043 calculate_live_intervals(); 3044 3045 foreach_iter(exec_list_iterator, iter, this->instructions) { 3046 fs_inst *inst = (fs_inst *)iter.get(); 3047 3048 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 3049 inst->remove(); 3050 progress = true; 3051 } 3052 3053 pc++; 3054 } 3055 3056 if (progress) 3057 live_intervals_valid = false; 3058 3059 return progress; 3060} 3061 3062bool 3063fs_visitor::register_coalesce() 3064{ 3065 bool progress = false; 3066 int if_depth = 0; 3067 int loop_depth = 0; 3068 3069 foreach_iter(exec_list_iterator, iter, this->instructions) { 3070 fs_inst *inst = (fs_inst *)iter.get(); 3071 3072 /* Make sure that we dominate the instructions we're going to 3073 * scan for interfering with our coalescing, or we won't have 3074 * scanned enough to see if anything interferes with our 3075 * coalescing. We don't dominate the following instructions if 3076 * we're in a loop or an if block. 3077 */ 3078 switch (inst->opcode) { 3079 case BRW_OPCODE_DO: 3080 loop_depth++; 3081 break; 3082 case BRW_OPCODE_WHILE: 3083 loop_depth--; 3084 break; 3085 case BRW_OPCODE_IF: 3086 if_depth++; 3087 break; 3088 case BRW_OPCODE_ENDIF: 3089 if_depth--; 3090 break; 3091 } 3092 if (loop_depth || if_depth) 3093 continue; 3094 3095 if (inst->opcode != BRW_OPCODE_MOV || 3096 inst->predicated || 3097 inst->saturate || 3098 inst->dst.file != GRF || inst->src[0].file != GRF || 3099 inst->dst.type != inst->src[0].type) 3100 continue; 3101 3102 bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate; 3103 3104 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 3105 * them: check for no writes to either one until the exit of the 3106 * program. 3107 */ 3108 bool interfered = false; 3109 exec_list_iterator scan_iter = iter; 3110 scan_iter.next(); 3111 for (; scan_iter.has_next(); scan_iter.next()) { 3112 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3113 3114 if (scan_inst->dst.file == GRF) { 3115 if (scan_inst->dst.reg == inst->dst.reg && 3116 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 3117 scan_inst->is_tex())) { 3118 interfered = true; 3119 break; 3120 } 3121 if (scan_inst->dst.reg == inst->src[0].reg && 3122 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 3123 scan_inst->is_tex())) { 3124 interfered = true; 3125 break; 3126 } 3127 } 3128 3129 /* The gen6 MATH instruction can't handle source modifiers, so avoid 3130 * coalescing those for now. We should do something more specific. 3131 */ 3132 if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) { 3133 interfered = true; 3134 break; 3135 } 3136 } 3137 if (interfered) { 3138 continue; 3139 } 3140 3141 /* Rewrite the later usage to point at the source of the move to 3142 * be removed. 3143 */ 3144 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 3145 scan_iter.next()) { 3146 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3147 3148 for (int i = 0; i < 3; i++) { 3149 if (scan_inst->src[i].file == GRF && 3150 scan_inst->src[i].reg == inst->dst.reg && 3151 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 3152 scan_inst->src[i].reg = inst->src[0].reg; 3153 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 3154 scan_inst->src[i].abs |= inst->src[0].abs; 3155 scan_inst->src[i].negate ^= inst->src[0].negate; 3156 scan_inst->src[i].smear = inst->src[0].smear; 3157 } 3158 } 3159 } 3160 3161 inst->remove(); 3162 progress = true; 3163 } 3164 3165 if (progress) 3166 live_intervals_valid = false; 3167 3168 return progress; 3169} 3170 3171 3172bool 3173fs_visitor::compute_to_mrf() 3174{ 3175 bool progress = false; 3176 int next_ip = 0; 3177 3178 calculate_live_intervals(); 3179 3180 foreach_iter(exec_list_iterator, iter, this->instructions) { 3181 fs_inst *inst = (fs_inst *)iter.get(); 3182 3183 int ip = next_ip; 3184 next_ip++; 3185 3186 if (inst->opcode != BRW_OPCODE_MOV || 3187 inst->predicated || 3188 inst->dst.file != MRF || inst->src[0].file != GRF || 3189 inst->dst.type != inst->src[0].type || 3190 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 3191 continue; 3192 3193 /* Can't compute-to-MRF this GRF if someone else was going to 3194 * read it later. 3195 */ 3196 if (this->virtual_grf_use[inst->src[0].reg] > ip) 3197 continue; 3198 3199 /* Found a move of a GRF to a MRF. Let's see if we can go 3200 * rewrite the thing that made this GRF to write into the MRF. 3201 */ 3202 fs_inst *scan_inst; 3203 for (scan_inst = (fs_inst *)inst->prev; 3204 scan_inst->prev != NULL; 3205 scan_inst = (fs_inst *)scan_inst->prev) { 3206 if (scan_inst->dst.file == GRF && 3207 scan_inst->dst.reg == inst->src[0].reg) { 3208 /* Found the last thing to write our reg we want to turn 3209 * into a compute-to-MRF. 3210 */ 3211 3212 if (scan_inst->is_tex()) { 3213 /* texturing writes several continuous regs, so we can't 3214 * compute-to-mrf that. 3215 */ 3216 break; 3217 } 3218 3219 /* If it's predicated, it (probably) didn't populate all 3220 * the channels. 3221 */ 3222 if (scan_inst->predicated) 3223 break; 3224 3225 /* SEND instructions can't have MRF as a destination. */ 3226 if (scan_inst->mlen) 3227 break; 3228 3229 if (intel->gen >= 6) { 3230 /* gen6 math instructions must have the destination be 3231 * GRF, so no compute-to-MRF for them. 3232 */ 3233 if (scan_inst->is_math()) { 3234 break; 3235 } 3236 } 3237 3238 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 3239 /* Found the creator of our MRF's source value. */ 3240 scan_inst->dst.file = MRF; 3241 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3242 scan_inst->saturate |= inst->saturate; 3243 inst->remove(); 3244 progress = true; 3245 } 3246 break; 3247 } 3248 3249 /* We don't handle flow control here. Most computation of 3250 * values that end up in MRFs are shortly before the MRF 3251 * write anyway. 3252 */ 3253 if (scan_inst->opcode == BRW_OPCODE_DO || 3254 scan_inst->opcode == BRW_OPCODE_WHILE || 3255 scan_inst->opcode == BRW_OPCODE_ELSE || 3256 scan_inst->opcode == BRW_OPCODE_ENDIF) { 3257 break; 3258 } 3259 3260 /* You can't read from an MRF, so if someone else reads our 3261 * MRF's source GRF that we wanted to rewrite, that stops us. 3262 */ 3263 bool interfered = false; 3264 for (int i = 0; i < 3; i++) { 3265 if (scan_inst->src[i].file == GRF && 3266 scan_inst->src[i].reg == inst->src[0].reg && 3267 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 3268 interfered = true; 3269 } 3270 } 3271 if (interfered) 3272 break; 3273 3274 if (scan_inst->dst.file == MRF && 3275 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 3276 /* Somebody else wrote our MRF here, so we can't can't 3277 * compute-to-MRF before that. 3278 */ 3279 break; 3280 } 3281 3282 if (scan_inst->mlen > 0) { 3283 /* Found a SEND instruction, which means that there are 3284 * live values in MRFs from base_mrf to base_mrf + 3285 * scan_inst->mlen - 1. Don't go pushing our MRF write up 3286 * above it. 3287 */ 3288 if (inst->dst.hw_reg >= scan_inst->base_mrf && 3289 inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { 3290 break; 3291 } 3292 } 3293 } 3294 } 3295 3296 return progress; 3297} 3298 3299/** 3300 * Walks through basic blocks, locking for repeated MRF writes and 3301 * removing the later ones. 3302 */ 3303bool 3304fs_visitor::remove_duplicate_mrf_writes() 3305{ 3306 fs_inst *last_mrf_move[16]; 3307 bool progress = false; 3308 3309 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3310 3311 foreach_iter(exec_list_iterator, iter, this->instructions) { 3312 fs_inst *inst = (fs_inst *)iter.get(); 3313 3314 switch (inst->opcode) { 3315 case BRW_OPCODE_DO: 3316 case BRW_OPCODE_WHILE: 3317 case BRW_OPCODE_IF: 3318 case BRW_OPCODE_ELSE: 3319 case BRW_OPCODE_ENDIF: 3320 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3321 continue; 3322 default: 3323 break; 3324 } 3325 3326 if (inst->opcode == BRW_OPCODE_MOV && 3327 inst->dst.file == MRF) { 3328 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; 3329 if (prev_inst && inst->equals(prev_inst)) { 3330 inst->remove(); 3331 progress = true; 3332 continue; 3333 } 3334 } 3335 3336 /* Clear out the last-write records for MRFs that were overwritten. */ 3337 if (inst->dst.file == MRF) { 3338 last_mrf_move[inst->dst.hw_reg] = NULL; 3339 } 3340 3341 if (inst->mlen > 0) { 3342 /* Found a SEND instruction, which will include two or fewer 3343 * implied MRF writes. We could do better here. 3344 */ 3345 for (int i = 0; i < implied_mrf_writes(inst); i++) { 3346 last_mrf_move[inst->base_mrf + i] = NULL; 3347 } 3348 } 3349 3350 /* Clear out any MRF move records whose sources got overwritten. */ 3351 if (inst->dst.file == GRF) { 3352 for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { 3353 if (last_mrf_move[i] && 3354 last_mrf_move[i]->src[0].reg == inst->dst.reg) { 3355 last_mrf_move[i] = NULL; 3356 } 3357 } 3358 } 3359 3360 if (inst->opcode == BRW_OPCODE_MOV && 3361 inst->dst.file == MRF && 3362 inst->src[0].file == GRF && 3363 !inst->predicated) { 3364 last_mrf_move[inst->dst.hw_reg] = inst; 3365 } 3366 } 3367 3368 return progress; 3369} 3370 3371bool 3372fs_visitor::virtual_grf_interferes(int a, int b) 3373{ 3374 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3375 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3376 3377 /* We can't handle dead register writes here, without iterating 3378 * over the whole instruction stream to find every single dead 3379 * write to that register to compare to the live interval of the 3380 * other register. Just assert that dead_code_eliminate() has been 3381 * called. 3382 */ 3383 assert((this->virtual_grf_use[a] != -1 || 3384 this->virtual_grf_def[a] == MAX_INSTRUCTION) && 3385 (this->virtual_grf_use[b] != -1 || 3386 this->virtual_grf_def[b] == MAX_INSTRUCTION)); 3387 3388 return start < end; 3389} 3390 3391static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3392{ 3393 struct brw_reg brw_reg; 3394 3395 switch (reg->file) { 3396 case GRF: 3397 case ARF: 3398 case MRF: 3399 if (reg->smear == -1) { 3400 brw_reg = brw_vec8_reg(reg->file, 3401 reg->hw_reg, 0); 3402 } else { 3403 brw_reg = brw_vec1_reg(reg->file, 3404 reg->hw_reg, reg->smear); 3405 } 3406 brw_reg = retype(brw_reg, reg->type); 3407 break; 3408 case IMM: 3409 switch (reg->type) { 3410 case BRW_REGISTER_TYPE_F: 3411 brw_reg = brw_imm_f(reg->imm.f); 3412 break; 3413 case BRW_REGISTER_TYPE_D: 3414 brw_reg = brw_imm_d(reg->imm.i); 3415 break; 3416 case BRW_REGISTER_TYPE_UD: 3417 brw_reg = brw_imm_ud(reg->imm.u); 3418 break; 3419 default: 3420 assert(!"not reached"); 3421 brw_reg = brw_null_reg(); 3422 break; 3423 } 3424 break; 3425 case FIXED_HW_REG: 3426 brw_reg = reg->fixed_hw_reg; 3427 break; 3428 case BAD_FILE: 3429 /* Probably unused. */ 3430 brw_reg = brw_null_reg(); 3431 break; 3432 case UNIFORM: 3433 assert(!"not reached"); 3434 brw_reg = brw_null_reg(); 3435 break; 3436 default: 3437 assert(!"not reached"); 3438 brw_reg = brw_null_reg(); 3439 break; 3440 } 3441 if (reg->abs) 3442 brw_reg = brw_abs(brw_reg); 3443 if (reg->negate) 3444 brw_reg = negate(brw_reg); 3445 3446 return brw_reg; 3447} 3448 3449void 3450fs_visitor::generate_code() 3451{ 3452 int last_native_inst = 0; 3453 const char *last_annotation_string = NULL; 3454 ir_instruction *last_annotation_ir = NULL; 3455 3456 int if_stack_array_size = 16; 3457 int loop_stack_array_size = 16; 3458 int if_stack_depth = 0, loop_stack_depth = 0; 3459 brw_instruction **if_stack = 3460 rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size); 3461 brw_instruction **loop_stack = 3462 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 3463 int *if_depth_in_loop = 3464 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 3465 3466 3467 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3468 printf("Native code for fragment shader %d:\n", 3469 ctx->Shader.CurrentFragmentProgram->Name); 3470 } 3471 3472 foreach_iter(exec_list_iterator, iter, this->instructions) { 3473 fs_inst *inst = (fs_inst *)iter.get(); 3474 struct brw_reg src[3], dst; 3475 3476 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3477 if (last_annotation_ir != inst->ir) { 3478 last_annotation_ir = inst->ir; 3479 if (last_annotation_ir) { 3480 printf(" "); 3481 last_annotation_ir->print(); 3482 printf("\n"); 3483 } 3484 } 3485 if (last_annotation_string != inst->annotation) { 3486 last_annotation_string = inst->annotation; 3487 if (last_annotation_string) 3488 printf(" %s\n", last_annotation_string); 3489 } 3490 } 3491 3492 for (unsigned int i = 0; i < 3; i++) { 3493 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3494 } 3495 dst = brw_reg_from_fs_reg(&inst->dst); 3496 3497 brw_set_conditionalmod(p, inst->conditional_mod); 3498 brw_set_predicate_control(p, inst->predicated); 3499 brw_set_saturate(p, inst->saturate); 3500 3501 switch (inst->opcode) { 3502 case BRW_OPCODE_MOV: 3503 brw_MOV(p, dst, src[0]); 3504 break; 3505 case BRW_OPCODE_ADD: 3506 brw_ADD(p, dst, src[0], src[1]); 3507 break; 3508 case BRW_OPCODE_MUL: 3509 brw_MUL(p, dst, src[0], src[1]); 3510 break; 3511 3512 case BRW_OPCODE_FRC: 3513 brw_FRC(p, dst, src[0]); 3514 break; 3515 case BRW_OPCODE_RNDD: 3516 brw_RNDD(p, dst, src[0]); 3517 break; 3518 case BRW_OPCODE_RNDE: 3519 brw_RNDE(p, dst, src[0]); 3520 break; 3521 case BRW_OPCODE_RNDZ: 3522 brw_RNDZ(p, dst, src[0]); 3523 break; 3524 3525 case BRW_OPCODE_AND: 3526 brw_AND(p, dst, src[0], src[1]); 3527 break; 3528 case BRW_OPCODE_OR: 3529 brw_OR(p, dst, src[0], src[1]); 3530 break; 3531 case BRW_OPCODE_XOR: 3532 brw_XOR(p, dst, src[0], src[1]); 3533 break; 3534 case BRW_OPCODE_NOT: 3535 brw_NOT(p, dst, src[0]); 3536 break; 3537 case BRW_OPCODE_ASR: 3538 brw_ASR(p, dst, src[0], src[1]); 3539 break; 3540 case BRW_OPCODE_SHR: 3541 brw_SHR(p, dst, src[0], src[1]); 3542 break; 3543 case BRW_OPCODE_SHL: 3544 brw_SHL(p, dst, src[0], src[1]); 3545 break; 3546 3547 case BRW_OPCODE_CMP: 3548 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3549 break; 3550 case BRW_OPCODE_SEL: 3551 brw_SEL(p, dst, src[0], src[1]); 3552 break; 3553 3554 case BRW_OPCODE_IF: 3555 if (inst->src[0].file != BAD_FILE) { 3556 assert(intel->gen >= 6); 3557 if_stack[if_stack_depth] = gen6_IF(p, inst->conditional_mod, src[0], src[1]); 3558 } else { 3559 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3560 } 3561 if_depth_in_loop[loop_stack_depth]++; 3562 if_stack_depth++; 3563 if (if_stack_array_size <= if_stack_depth) { 3564 if_stack_array_size *= 2; 3565 if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *, 3566 if_stack_array_size); 3567 } 3568 break; 3569 3570 case BRW_OPCODE_ELSE: 3571 if_stack[if_stack_depth - 1] = 3572 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3573 break; 3574 case BRW_OPCODE_ENDIF: 3575 if_stack_depth--; 3576 brw_ENDIF(p , if_stack[if_stack_depth]); 3577 if_depth_in_loop[loop_stack_depth]--; 3578 break; 3579 3580 case BRW_OPCODE_DO: 3581 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3582 if (loop_stack_array_size <= loop_stack_depth) { 3583 loop_stack_array_size *= 2; 3584 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 3585 loop_stack_array_size); 3586 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 3587 loop_stack_array_size); 3588 } 3589 if_depth_in_loop[loop_stack_depth] = 0; 3590 break; 3591 3592 case BRW_OPCODE_BREAK: 3593 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3594 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3595 break; 3596 case BRW_OPCODE_CONTINUE: 3597 /* FINISHME: We need to write the loop instruction support still. */ 3598 if (intel->gen >= 6) 3599 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 3600 else 3601 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3602 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3603 break; 3604 3605 case BRW_OPCODE_WHILE: { 3606 struct brw_instruction *inst0, *inst1; 3607 GLuint br = 1; 3608 3609 if (intel->gen >= 5) 3610 br = 2; 3611 3612 assert(loop_stack_depth > 0); 3613 loop_stack_depth--; 3614 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3615 if (intel->gen < 6) { 3616 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3617 while (inst0 > loop_stack[loop_stack_depth]) { 3618 inst0--; 3619 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3620 inst0->bits3.if_else.jump_count == 0) { 3621 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3622 } 3623 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3624 inst0->bits3.if_else.jump_count == 0) { 3625 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3626 } 3627 } 3628 } 3629 } 3630 break; 3631 3632 case FS_OPCODE_RCP: 3633 case FS_OPCODE_RSQ: 3634 case FS_OPCODE_SQRT: 3635 case FS_OPCODE_EXP2: 3636 case FS_OPCODE_LOG2: 3637 case FS_OPCODE_POW: 3638 case FS_OPCODE_SIN: 3639 case FS_OPCODE_COS: 3640 generate_math(inst, dst, src); 3641 break; 3642 case FS_OPCODE_CINTERP: 3643 brw_MOV(p, dst, src[0]); 3644 break; 3645 case FS_OPCODE_LINTERP: 3646 generate_linterp(inst, dst, src); 3647 break; 3648 case FS_OPCODE_TEX: 3649 case FS_OPCODE_TXB: 3650 case FS_OPCODE_TXD: 3651 case FS_OPCODE_TXL: 3652 generate_tex(inst, dst, src[0]); 3653 break; 3654 case FS_OPCODE_DISCARD_NOT: 3655 generate_discard_not(inst, dst); 3656 break; 3657 case FS_OPCODE_DISCARD_AND: 3658 generate_discard_and(inst, src[0]); 3659 break; 3660 case FS_OPCODE_DDX: 3661 generate_ddx(inst, dst, src[0]); 3662 break; 3663 case FS_OPCODE_DDY: 3664 generate_ddy(inst, dst, src[0]); 3665 break; 3666 3667 case FS_OPCODE_SPILL: 3668 generate_spill(inst, src[0]); 3669 break; 3670 3671 case FS_OPCODE_UNSPILL: 3672 generate_unspill(inst, dst); 3673 break; 3674 3675 case FS_OPCODE_PULL_CONSTANT_LOAD: 3676 generate_pull_constant_load(inst, dst); 3677 break; 3678 3679 case FS_OPCODE_FB_WRITE: 3680 generate_fb_write(inst); 3681 break; 3682 default: 3683 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3684 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3685 brw_opcodes[inst->opcode].name); 3686 } else { 3687 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3688 } 3689 this->fail = true; 3690 } 3691 3692 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3693 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3694 if (0) { 3695 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3696 ((uint32_t *)&p->store[i])[3], 3697 ((uint32_t *)&p->store[i])[2], 3698 ((uint32_t *)&p->store[i])[1], 3699 ((uint32_t *)&p->store[i])[0]); 3700 } 3701 brw_disasm(stdout, &p->store[i], intel->gen); 3702 } 3703 } 3704 3705 last_native_inst = p->nr_insn; 3706 } 3707 3708 ralloc_free(if_stack); 3709 ralloc_free(loop_stack); 3710 ralloc_free(if_depth_in_loop); 3711 3712 brw_set_uip_jip(p); 3713 3714 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS 3715 * emit issues, it doesn't get the jump distances into the output, 3716 * which is often something we want to debug. So this is here in 3717 * case you're doing that. 3718 */ 3719 if (0) { 3720 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3721 for (unsigned int i = 0; i < p->nr_insn; i++) { 3722 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3723 ((uint32_t *)&p->store[i])[3], 3724 ((uint32_t *)&p->store[i])[2], 3725 ((uint32_t *)&p->store[i])[1], 3726 ((uint32_t *)&p->store[i])[0]); 3727 brw_disasm(stdout, &p->store[i], intel->gen); 3728 } 3729 } 3730 } 3731} 3732 3733GLboolean 3734brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3735{ 3736 struct intel_context *intel = &brw->intel; 3737 struct gl_context *ctx = &intel->ctx; 3738 struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; 3739 3740 if (!prog) 3741 return GL_FALSE; 3742 3743 struct brw_shader *shader = 3744 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3745 if (!shader) 3746 return GL_FALSE; 3747 3748 /* We always use 8-wide mode, at least for now. For one, flow 3749 * control only works in 8-wide. Also, when we're fragment shader 3750 * bound, we're almost always under register pressure as well, so 3751 * 8-wide would save us from the performance cliff of spilling 3752 * regs. 3753 */ 3754 c->dispatch_width = 8; 3755 3756 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3757 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3758 _mesa_print_ir(shader->ir, NULL); 3759 printf("\n"); 3760 } 3761 3762 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3763 */ 3764 fs_visitor v(c, shader); 3765 3766 if (0) { 3767 v.emit_dummy_fs(); 3768 } else { 3769 v.calculate_urb_setup(); 3770 if (intel->gen < 6) 3771 v.emit_interpolation_setup_gen4(); 3772 else 3773 v.emit_interpolation_setup_gen6(); 3774 3775 /* Generate FS IR for main(). (the visitor only descends into 3776 * functions called "main"). 3777 */ 3778 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3779 ir_instruction *ir = (ir_instruction *)iter.get(); 3780 v.base_ir = ir; 3781 ir->accept(&v); 3782 } 3783 3784 v.emit_fb_writes(); 3785 3786 v.split_virtual_grfs(); 3787 3788 v.setup_paramvalues_refs(); 3789 v.setup_pull_constants(); 3790 3791 bool progress; 3792 do { 3793 progress = false; 3794 3795 progress = v.remove_duplicate_mrf_writes() || progress; 3796 3797 progress = v.propagate_constants() || progress; 3798 progress = v.register_coalesce() || progress; 3799 progress = v.compute_to_mrf() || progress; 3800 progress = v.dead_code_eliminate() || progress; 3801 } while (progress); 3802 3803 v.schedule_instructions(); 3804 3805 v.assign_curb_setup(); 3806 v.assign_urb_setup(); 3807 3808 if (0) { 3809 /* Debug of register spilling: Go spill everything. */ 3810 int virtual_grf_count = v.virtual_grf_next; 3811 for (int i = 1; i < virtual_grf_count; i++) { 3812 v.spill_reg(i); 3813 } 3814 } 3815 3816 if (0) 3817 v.assign_regs_trivial(); 3818 else { 3819 while (!v.assign_regs()) { 3820 if (v.fail) 3821 break; 3822 } 3823 } 3824 } 3825 3826 if (!v.fail) 3827 v.generate_code(); 3828 3829 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3830 3831 if (v.fail) 3832 return GL_FALSE; 3833 3834 c->prog_data.total_grf = v.grf_used; 3835 3836 return GL_TRUE; 3837} 3838