brw_fs.cpp revision 7c7df146b59bae9dcb3a271bd3c671e273015617
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = talloc_zero(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = talloc_zero(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct brw_context *brw = brw_context(ctx); 93 struct intel_context *intel = &brw->intel; 94 95 struct brw_shader *shader = 96 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 97 if (shader != NULL) { 98 void *mem_ctx = talloc_new(NULL); 99 bool progress; 100 101 if (shader->ir) 102 talloc_free(shader->ir); 103 shader->ir = new(shader) exec_list; 104 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 105 106 do_mat_op_to_vec(shader->ir); 107 lower_instructions(shader->ir, 108 MOD_TO_FRACT | 109 DIV_TO_MUL_RCP | 110 SUB_TO_ADD_NEG | 111 EXP_TO_EXP2 | 112 LOG_TO_LOG2); 113 114 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, 115 * if-statements need to be flattened. 116 */ 117 if (intel->gen < 6) 118 lower_if_to_cond_assign(shader->ir, 16); 119 120 do_lower_texture_projection(shader->ir); 121 do_vec_index_to_cond_assign(shader->ir); 122 brw_do_cubemap_normalize(shader->ir); 123 124 do { 125 progress = false; 126 127 brw_do_channel_expressions(shader->ir); 128 brw_do_vector_splitting(shader->ir); 129 130 progress = do_lower_jumps(shader->ir, true, true, 131 true, /* main return */ 132 false, /* continue */ 133 false /* loops */ 134 ) || progress; 135 136 progress = do_common_optimization(shader->ir, true, 32) || progress; 137 138 progress = lower_noise(shader->ir) || progress; 139 progress = 140 lower_variable_index_to_cond_assign(shader->ir, 141 GL_TRUE, /* input */ 142 GL_TRUE, /* output */ 143 GL_TRUE, /* temp */ 144 GL_TRUE /* uniform */ 145 ) || progress; 146 progress = lower_quadop_vector(shader->ir, false) || progress; 147 } while (progress); 148 149 validate_ir_tree(shader->ir); 150 151 reparent_ir(shader->ir, shader->ir); 152 talloc_free(mem_ctx); 153 } 154 155 if (!_mesa_ir_link_shader(ctx, prog)) 156 return GL_FALSE; 157 158 return GL_TRUE; 159} 160 161static int 162type_size(const struct glsl_type *type) 163{ 164 unsigned int size, i; 165 166 switch (type->base_type) { 167 case GLSL_TYPE_UINT: 168 case GLSL_TYPE_INT: 169 case GLSL_TYPE_FLOAT: 170 case GLSL_TYPE_BOOL: 171 return type->components(); 172 case GLSL_TYPE_ARRAY: 173 return type_size(type->fields.array) * type->length; 174 case GLSL_TYPE_STRUCT: 175 size = 0; 176 for (i = 0; i < type->length; i++) { 177 size += type_size(type->fields.structure[i].type); 178 } 179 return size; 180 case GLSL_TYPE_SAMPLER: 181 /* Samplers take up no register space, since they're baked in at 182 * link time. 183 */ 184 return 0; 185 default: 186 assert(!"not reached"); 187 return 0; 188 } 189} 190 191/** 192 * Returns how many MRFs an FS opcode will write over. 193 * 194 * Note that this is not the 0 or 1 implied writes in an actual gen 195 * instruction -- the FS opcodes often generate MOVs in addition. 196 */ 197int 198fs_visitor::implied_mrf_writes(fs_inst *inst) 199{ 200 if (inst->mlen == 0) 201 return 0; 202 203 switch (inst->opcode) { 204 case FS_OPCODE_RCP: 205 case FS_OPCODE_RSQ: 206 case FS_OPCODE_SQRT: 207 case FS_OPCODE_EXP2: 208 case FS_OPCODE_LOG2: 209 case FS_OPCODE_SIN: 210 case FS_OPCODE_COS: 211 return 1; 212 case FS_OPCODE_POW: 213 return 2; 214 case FS_OPCODE_TEX: 215 case FS_OPCODE_TXB: 216 case FS_OPCODE_TXL: 217 return 1; 218 case FS_OPCODE_FB_WRITE: 219 return 2; 220 case FS_OPCODE_PULL_CONSTANT_LOAD: 221 case FS_OPCODE_UNSPILL: 222 return 1; 223 case FS_OPCODE_SPILL: 224 return 2; 225 default: 226 assert(!"not reached"); 227 return inst->mlen; 228 } 229} 230 231int 232fs_visitor::virtual_grf_alloc(int size) 233{ 234 if (virtual_grf_array_size <= virtual_grf_next) { 235 if (virtual_grf_array_size == 0) 236 virtual_grf_array_size = 16; 237 else 238 virtual_grf_array_size *= 2; 239 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 240 int, virtual_grf_array_size); 241 242 /* This slot is always unused. */ 243 virtual_grf_sizes[0] = 0; 244 } 245 virtual_grf_sizes[virtual_grf_next] = size; 246 return virtual_grf_next++; 247} 248 249/** Fixed HW reg constructor. */ 250fs_reg::fs_reg(enum register_file file, int hw_reg) 251{ 252 init(); 253 this->file = file; 254 this->hw_reg = hw_reg; 255 this->type = BRW_REGISTER_TYPE_F; 256} 257 258/** Fixed HW reg constructor. */ 259fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 260{ 261 init(); 262 this->file = file; 263 this->hw_reg = hw_reg; 264 this->type = type; 265} 266 267int 268brw_type_for_base_type(const struct glsl_type *type) 269{ 270 switch (type->base_type) { 271 case GLSL_TYPE_FLOAT: 272 return BRW_REGISTER_TYPE_F; 273 case GLSL_TYPE_INT: 274 case GLSL_TYPE_BOOL: 275 return BRW_REGISTER_TYPE_D; 276 case GLSL_TYPE_UINT: 277 return BRW_REGISTER_TYPE_UD; 278 case GLSL_TYPE_ARRAY: 279 case GLSL_TYPE_STRUCT: 280 case GLSL_TYPE_SAMPLER: 281 /* These should be overridden with the type of the member when 282 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 283 * way to trip up if we don't. 284 */ 285 return BRW_REGISTER_TYPE_UD; 286 default: 287 assert(!"not reached"); 288 return BRW_REGISTER_TYPE_F; 289 } 290} 291 292/** Automatic reg constructor. */ 293fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 294{ 295 init(); 296 297 this->file = GRF; 298 this->reg = v->virtual_grf_alloc(type_size(type)); 299 this->reg_offset = 0; 300 this->type = brw_type_for_base_type(type); 301} 302 303fs_reg * 304fs_visitor::variable_storage(ir_variable *var) 305{ 306 return (fs_reg *)hash_table_find(this->variable_ht, var); 307} 308 309/* Our support for uniforms is piggy-backed on the struct 310 * gl_fragment_program, because that's where the values actually 311 * get stored, rather than in some global gl_shader_program uniform 312 * store. 313 */ 314int 315fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 316{ 317 unsigned int offset = 0; 318 float *vec_values; 319 320 if (type->is_matrix()) { 321 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 322 type->vector_elements, 323 1); 324 325 for (unsigned int i = 0; i < type->matrix_columns; i++) { 326 offset += setup_uniform_values(loc + offset, column); 327 } 328 329 return offset; 330 } 331 332 switch (type->base_type) { 333 case GLSL_TYPE_FLOAT: 334 case GLSL_TYPE_UINT: 335 case GLSL_TYPE_INT: 336 case GLSL_TYPE_BOOL: 337 vec_values = fp->Base.Parameters->ParameterValues[loc]; 338 for (unsigned int i = 0; i < type->vector_elements; i++) { 339 unsigned int param = c->prog_data.nr_params++; 340 341 assert(param < ARRAY_SIZE(c->prog_data.param)); 342 343 switch (type->base_type) { 344 case GLSL_TYPE_FLOAT: 345 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 346 break; 347 case GLSL_TYPE_UINT: 348 c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; 349 break; 350 case GLSL_TYPE_INT: 351 c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; 352 break; 353 case GLSL_TYPE_BOOL: 354 c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; 355 break; 356 default: 357 assert(!"not reached"); 358 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 359 break; 360 } 361 362 c->prog_data.param[param] = &vec_values[i]; 363 } 364 return 1; 365 366 case GLSL_TYPE_STRUCT: 367 for (unsigned int i = 0; i < type->length; i++) { 368 offset += setup_uniform_values(loc + offset, 369 type->fields.structure[i].type); 370 } 371 return offset; 372 373 case GLSL_TYPE_ARRAY: 374 for (unsigned int i = 0; i < type->length; i++) { 375 offset += setup_uniform_values(loc + offset, type->fields.array); 376 } 377 return offset; 378 379 case GLSL_TYPE_SAMPLER: 380 /* The sampler takes up a slot, but we don't use any values from it. */ 381 return 1; 382 383 default: 384 assert(!"not reached"); 385 return 0; 386 } 387} 388 389 390/* Our support for builtin uniforms is even scarier than non-builtin. 391 * It sits on top of the PROG_STATE_VAR parameters that are 392 * automatically updated from GL context state. 393 */ 394void 395fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 396{ 397 const struct gl_builtin_uniform_desc *statevar = NULL; 398 399 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 400 statevar = &_mesa_builtin_uniform_desc[i]; 401 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 402 break; 403 } 404 405 if (!statevar->name) { 406 this->fail = true; 407 printf("Failed to find builtin uniform `%s'\n", ir->name); 408 return; 409 } 410 411 int array_count; 412 if (ir->type->is_array()) { 413 array_count = ir->type->length; 414 } else { 415 array_count = 1; 416 } 417 418 for (int a = 0; a < array_count; a++) { 419 for (unsigned int i = 0; i < statevar->num_elements; i++) { 420 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 421 int tokens[STATE_LENGTH]; 422 423 memcpy(tokens, element->tokens, sizeof(element->tokens)); 424 if (ir->type->is_array()) { 425 tokens[1] = a; 426 } 427 428 /* This state reference has already been setup by ir_to_mesa, 429 * but we'll get the same index back here. 430 */ 431 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 432 (gl_state_index *)tokens); 433 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 434 435 /* Add each of the unique swizzles of the element as a 436 * parameter. This'll end up matching the expected layout of 437 * the array/matrix/structure we're trying to fill in. 438 */ 439 int last_swiz = -1; 440 for (unsigned int i = 0; i < 4; i++) { 441 int swiz = GET_SWZ(element->swizzle, i); 442 if (swiz == last_swiz) 443 break; 444 last_swiz = swiz; 445 446 c->prog_data.param_convert[c->prog_data.nr_params] = 447 PARAM_NO_CONVERT; 448 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 449 } 450 } 451 } 452} 453 454fs_reg * 455fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 456{ 457 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 458 fs_reg wpos = *reg; 459 fs_reg neg_y = this->pixel_y; 460 neg_y.negate = true; 461 bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 462 463 /* gl_FragCoord.x */ 464 if (ir->pixel_center_integer) { 465 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 466 } else { 467 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 468 } 469 wpos.reg_offset++; 470 471 /* gl_FragCoord.y */ 472 if (!flip && ir->pixel_center_integer) { 473 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 474 } else { 475 fs_reg pixel_y = this->pixel_y; 476 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 477 478 if (flip) { 479 pixel_y.negate = true; 480 offset += c->key.drawable_height - 1.0; 481 } 482 483 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 484 } 485 wpos.reg_offset++; 486 487 /* gl_FragCoord.z */ 488 if (intel->gen >= 6) { 489 emit(fs_inst(BRW_OPCODE_MOV, wpos, 490 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 491 } else { 492 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 493 interp_reg(FRAG_ATTRIB_WPOS, 2))); 494 } 495 wpos.reg_offset++; 496 497 /* gl_FragCoord.w: Already set up in emit_interpolation */ 498 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 499 500 return reg; 501} 502 503fs_reg * 504fs_visitor::emit_general_interpolation(ir_variable *ir) 505{ 506 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 507 /* Interpolation is always in floating point regs. */ 508 reg->type = BRW_REGISTER_TYPE_F; 509 fs_reg attr = *reg; 510 511 unsigned int array_elements; 512 const glsl_type *type; 513 514 if (ir->type->is_array()) { 515 array_elements = ir->type->length; 516 if (array_elements == 0) { 517 this->fail = true; 518 } 519 type = ir->type->fields.array; 520 } else { 521 array_elements = 1; 522 type = ir->type; 523 } 524 525 int location = ir->location; 526 for (unsigned int i = 0; i < array_elements; i++) { 527 for (unsigned int j = 0; j < type->matrix_columns; j++) { 528 if (urb_setup[location] == -1) { 529 /* If there's no incoming setup data for this slot, don't 530 * emit interpolation for it. 531 */ 532 attr.reg_offset += type->vector_elements; 533 location++; 534 continue; 535 } 536 537 if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || 538 location == FRAG_ATTRIB_COL1)) { 539 /* Constant interpolation (flat shading) case. The SF has 540 * handed us defined values in only the constant offset 541 * field of the setup reg. 542 */ 543 for (unsigned int c = 0; c < type->vector_elements; c++) { 544 struct brw_reg interp = interp_reg(location, c); 545 interp = suboffset(interp, 3); 546 emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); 547 attr.reg_offset++; 548 } 549 } else { 550 /* Perspective interpolation case. */ 551 for (unsigned int c = 0; c < type->vector_elements; c++) { 552 struct brw_reg interp = interp_reg(location, c); 553 emit(fs_inst(FS_OPCODE_LINTERP, 554 attr, 555 this->delta_x, 556 this->delta_y, 557 fs_reg(interp))); 558 attr.reg_offset++; 559 } 560 561 if (intel->gen < 6) { 562 attr.reg_offset -= type->vector_elements; 563 for (unsigned int c = 0; c < type->vector_elements; c++) { 564 emit(fs_inst(BRW_OPCODE_MUL, 565 attr, 566 attr, 567 this->pixel_w)); 568 attr.reg_offset++; 569 } 570 } 571 } 572 location++; 573 } 574 } 575 576 return reg; 577} 578 579fs_reg * 580fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 581{ 582 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 583 584 /* The frontfacing comes in as a bit in the thread payload. */ 585 if (intel->gen >= 6) { 586 emit(fs_inst(BRW_OPCODE_ASR, 587 *reg, 588 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 589 fs_reg(15))); 590 emit(fs_inst(BRW_OPCODE_NOT, 591 *reg, 592 *reg)); 593 emit(fs_inst(BRW_OPCODE_AND, 594 *reg, 595 *reg, 596 fs_reg(1))); 597 } else { 598 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 599 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 600 * us front face 601 */ 602 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 603 *reg, 604 fs_reg(r1_6ud), 605 fs_reg(1u << 31))); 606 inst->conditional_mod = BRW_CONDITIONAL_L; 607 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 608 } 609 610 return reg; 611} 612 613fs_inst * 614fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 615{ 616 switch (opcode) { 617 case FS_OPCODE_RCP: 618 case FS_OPCODE_RSQ: 619 case FS_OPCODE_SQRT: 620 case FS_OPCODE_EXP2: 621 case FS_OPCODE_LOG2: 622 case FS_OPCODE_SIN: 623 case FS_OPCODE_COS: 624 break; 625 default: 626 assert(!"not reached: bad math opcode"); 627 return NULL; 628 } 629 630 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 631 * might be able to do better by doing execsize = 1 math and then 632 * expanding that result out, but we would need to be careful with 633 * masking. 634 * 635 * The hardware ignores source modifiers (negate and abs) on math 636 * instructions, so we also move to a temp to set those up. 637 */ 638 if (intel->gen >= 6 && (src.file == UNIFORM || 639 src.abs || 640 src.negate)) { 641 fs_reg expanded = fs_reg(this, glsl_type::float_type); 642 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 643 src = expanded; 644 } 645 646 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 647 648 if (intel->gen < 6) { 649 inst->base_mrf = 2; 650 inst->mlen = 1; 651 } 652 653 return inst; 654} 655 656fs_inst * 657fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 658{ 659 int base_mrf = 2; 660 fs_inst *inst; 661 662 assert(opcode == FS_OPCODE_POW); 663 664 if (intel->gen >= 6) { 665 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 666 if (src0.file == UNIFORM) { 667 fs_reg expanded = fs_reg(this, glsl_type::float_type); 668 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 669 src0 = expanded; 670 } 671 672 if (src1.file == UNIFORM) { 673 fs_reg expanded = fs_reg(this, glsl_type::float_type); 674 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 675 src1 = expanded; 676 } 677 678 inst = emit(fs_inst(opcode, dst, src0, src1)); 679 } else { 680 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 681 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 682 683 inst->base_mrf = base_mrf; 684 inst->mlen = 2; 685 } 686 return inst; 687} 688 689void 690fs_visitor::visit(ir_variable *ir) 691{ 692 fs_reg *reg = NULL; 693 694 if (variable_storage(ir)) 695 return; 696 697 if (strcmp(ir->name, "gl_FragColor") == 0) { 698 this->frag_color = ir; 699 } else if (strcmp(ir->name, "gl_FragData") == 0) { 700 this->frag_data = ir; 701 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 702 this->frag_depth = ir; 703 } 704 705 if (ir->mode == ir_var_in) { 706 if (!strcmp(ir->name, "gl_FragCoord")) { 707 reg = emit_fragcoord_interpolation(ir); 708 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 709 reg = emit_frontfacing_interpolation(ir); 710 } else { 711 reg = emit_general_interpolation(ir); 712 } 713 assert(reg); 714 hash_table_insert(this->variable_ht, reg, ir); 715 return; 716 } 717 718 if (ir->mode == ir_var_uniform) { 719 int param_index = c->prog_data.nr_params; 720 721 if (!strncmp(ir->name, "gl_", 3)) { 722 setup_builtin_uniform_values(ir); 723 } else { 724 setup_uniform_values(ir->location, ir->type); 725 } 726 727 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 728 reg->type = brw_type_for_base_type(ir->type); 729 } 730 731 if (!reg) 732 reg = new(this->mem_ctx) fs_reg(this, ir->type); 733 734 hash_table_insert(this->variable_ht, reg, ir); 735} 736 737void 738fs_visitor::visit(ir_dereference_variable *ir) 739{ 740 fs_reg *reg = variable_storage(ir->var); 741 this->result = *reg; 742} 743 744void 745fs_visitor::visit(ir_dereference_record *ir) 746{ 747 const glsl_type *struct_type = ir->record->type; 748 749 ir->record->accept(this); 750 751 unsigned int offset = 0; 752 for (unsigned int i = 0; i < struct_type->length; i++) { 753 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 754 break; 755 offset += type_size(struct_type->fields.structure[i].type); 756 } 757 this->result.reg_offset += offset; 758 this->result.type = brw_type_for_base_type(ir->type); 759} 760 761void 762fs_visitor::visit(ir_dereference_array *ir) 763{ 764 ir_constant *index; 765 int element_size; 766 767 ir->array->accept(this); 768 index = ir->array_index->as_constant(); 769 770 element_size = type_size(ir->type); 771 this->result.type = brw_type_for_base_type(ir->type); 772 773 if (index) { 774 assert(this->result.file == UNIFORM || 775 (this->result.file == GRF && 776 this->result.reg != 0)); 777 this->result.reg_offset += index->value.i[0] * element_size; 778 } else { 779 assert(!"FINISHME: non-constant array element"); 780 } 781} 782 783/* Instruction selection: Produce a MOV.sat instead of 784 * MIN(MAX(val, 0), 1) when possible. 785 */ 786bool 787fs_visitor::try_emit_saturate(ir_expression *ir) 788{ 789 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 790 791 if (!sat_val) 792 return false; 793 794 sat_val->accept(this); 795 fs_reg src = this->result; 796 797 this->result = fs_reg(this, ir->type); 798 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); 799 inst->saturate = true; 800 801 return true; 802} 803 804static uint32_t 805brw_conditional_for_comparison(unsigned int op) 806{ 807 switch (op) { 808 case ir_binop_less: 809 return BRW_CONDITIONAL_L; 810 case ir_binop_greater: 811 return BRW_CONDITIONAL_G; 812 case ir_binop_lequal: 813 return BRW_CONDITIONAL_LE; 814 case ir_binop_gequal: 815 return BRW_CONDITIONAL_GE; 816 case ir_binop_equal: 817 case ir_binop_all_equal: /* same as equal for scalars */ 818 return BRW_CONDITIONAL_Z; 819 case ir_binop_nequal: 820 case ir_binop_any_nequal: /* same as nequal for scalars */ 821 return BRW_CONDITIONAL_NZ; 822 default: 823 assert(!"not reached: bad operation for comparison"); 824 return BRW_CONDITIONAL_NZ; 825 } 826} 827 828void 829fs_visitor::visit(ir_expression *ir) 830{ 831 unsigned int operand; 832 fs_reg op[2], temp; 833 fs_inst *inst; 834 835 assert(ir->get_num_operands() <= 2); 836 837 if (try_emit_saturate(ir)) 838 return; 839 840 for (operand = 0; operand < ir->get_num_operands(); operand++) { 841 ir->operands[operand]->accept(this); 842 if (this->result.file == BAD_FILE) { 843 ir_print_visitor v; 844 printf("Failed to get tree for expression operand:\n"); 845 ir->operands[operand]->accept(&v); 846 this->fail = true; 847 } 848 op[operand] = this->result; 849 850 /* Matrix expression operands should have been broken down to vector 851 * operations already. 852 */ 853 assert(!ir->operands[operand]->type->is_matrix()); 854 /* And then those vector operands should have been broken down to scalar. 855 */ 856 assert(!ir->operands[operand]->type->is_vector()); 857 } 858 859 /* Storage for our result. If our result goes into an assignment, it will 860 * just get copy-propagated out, so no worries. 861 */ 862 this->result = fs_reg(this, ir->type); 863 864 switch (ir->operation) { 865 case ir_unop_logic_not: 866 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 867 * ones complement of the whole register, not just bit 0. 868 */ 869 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 870 break; 871 case ir_unop_neg: 872 op[0].negate = !op[0].negate; 873 this->result = op[0]; 874 break; 875 case ir_unop_abs: 876 op[0].abs = true; 877 op[0].negate = false; 878 this->result = op[0]; 879 break; 880 case ir_unop_sign: 881 temp = fs_reg(this, ir->type); 882 883 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 884 885 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 886 inst->conditional_mod = BRW_CONDITIONAL_G; 887 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 888 inst->predicated = true; 889 890 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 891 inst->conditional_mod = BRW_CONDITIONAL_L; 892 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 893 inst->predicated = true; 894 895 break; 896 case ir_unop_rcp: 897 emit_math(FS_OPCODE_RCP, this->result, op[0]); 898 break; 899 900 case ir_unop_exp2: 901 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 902 break; 903 case ir_unop_log2: 904 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 905 break; 906 case ir_unop_exp: 907 case ir_unop_log: 908 assert(!"not reached: should be handled by ir_explog_to_explog2"); 909 break; 910 case ir_unop_sin: 911 case ir_unop_sin_reduced: 912 emit_math(FS_OPCODE_SIN, this->result, op[0]); 913 break; 914 case ir_unop_cos: 915 case ir_unop_cos_reduced: 916 emit_math(FS_OPCODE_COS, this->result, op[0]); 917 break; 918 919 case ir_unop_dFdx: 920 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 921 break; 922 case ir_unop_dFdy: 923 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 924 break; 925 926 case ir_binop_add: 927 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 928 break; 929 case ir_binop_sub: 930 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 931 break; 932 933 case ir_binop_mul: 934 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 935 break; 936 case ir_binop_div: 937 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 938 break; 939 case ir_binop_mod: 940 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 941 break; 942 943 case ir_binop_less: 944 case ir_binop_greater: 945 case ir_binop_lequal: 946 case ir_binop_gequal: 947 case ir_binop_equal: 948 case ir_binop_all_equal: 949 case ir_binop_nequal: 950 case ir_binop_any_nequal: 951 temp = this->result; 952 /* original gen4 does implicit conversion before comparison. */ 953 if (intel->gen < 5) 954 temp.type = op[0].type; 955 956 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); 957 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 958 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 959 break; 960 961 case ir_binop_logic_xor: 962 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 963 break; 964 965 case ir_binop_logic_or: 966 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 967 break; 968 969 case ir_binop_logic_and: 970 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 971 break; 972 973 case ir_binop_dot: 974 case ir_unop_any: 975 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 976 break; 977 978 case ir_unop_noise: 979 assert(!"not reached: should be handled by lower_noise"); 980 break; 981 982 case ir_quadop_vector: 983 assert(!"not reached: should be handled by lower_quadop_vector"); 984 break; 985 986 case ir_unop_sqrt: 987 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 988 break; 989 990 case ir_unop_rsq: 991 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 992 break; 993 994 case ir_unop_i2f: 995 case ir_unop_b2f: 996 case ir_unop_b2i: 997 case ir_unop_f2i: 998 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 999 break; 1000 case ir_unop_f2b: 1001 case ir_unop_i2b: 1002 temp = this->result; 1003 /* original gen4 does implicit conversion before comparison. */ 1004 if (intel->gen < 5) 1005 temp.type = op[0].type; 1006 1007 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); 1008 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1009 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 1010 this->result, fs_reg(1))); 1011 break; 1012 1013 case ir_unop_trunc: 1014 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 1015 break; 1016 case ir_unop_ceil: 1017 op[0].negate = !op[0].negate; 1018 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1019 this->result.negate = true; 1020 break; 1021 case ir_unop_floor: 1022 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1023 break; 1024 case ir_unop_fract: 1025 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1026 break; 1027 case ir_unop_round_even: 1028 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 1029 break; 1030 1031 case ir_binop_min: 1032 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1033 inst->conditional_mod = BRW_CONDITIONAL_L; 1034 1035 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1036 inst->predicated = true; 1037 break; 1038 case ir_binop_max: 1039 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1040 inst->conditional_mod = BRW_CONDITIONAL_G; 1041 1042 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1043 inst->predicated = true; 1044 break; 1045 1046 case ir_binop_pow: 1047 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 1048 break; 1049 1050 case ir_unop_bit_not: 1051 inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); 1052 break; 1053 case ir_binop_bit_and: 1054 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1055 break; 1056 case ir_binop_bit_xor: 1057 inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1058 break; 1059 case ir_binop_bit_or: 1060 inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1061 break; 1062 1063 case ir_unop_u2f: 1064 case ir_binop_lshift: 1065 case ir_binop_rshift: 1066 assert(!"GLSL 1.30 features unsupported"); 1067 break; 1068 } 1069} 1070 1071void 1072fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1073 const glsl_type *type, bool predicated) 1074{ 1075 switch (type->base_type) { 1076 case GLSL_TYPE_FLOAT: 1077 case GLSL_TYPE_UINT: 1078 case GLSL_TYPE_INT: 1079 case GLSL_TYPE_BOOL: 1080 for (unsigned int i = 0; i < type->components(); i++) { 1081 l.type = brw_type_for_base_type(type); 1082 r.type = brw_type_for_base_type(type); 1083 1084 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1085 inst->predicated = predicated; 1086 1087 l.reg_offset++; 1088 r.reg_offset++; 1089 } 1090 break; 1091 case GLSL_TYPE_ARRAY: 1092 for (unsigned int i = 0; i < type->length; i++) { 1093 emit_assignment_writes(l, r, type->fields.array, predicated); 1094 } 1095 break; 1096 1097 case GLSL_TYPE_STRUCT: 1098 for (unsigned int i = 0; i < type->length; i++) { 1099 emit_assignment_writes(l, r, type->fields.structure[i].type, 1100 predicated); 1101 } 1102 break; 1103 1104 case GLSL_TYPE_SAMPLER: 1105 break; 1106 1107 default: 1108 assert(!"not reached"); 1109 break; 1110 } 1111} 1112 1113void 1114fs_visitor::visit(ir_assignment *ir) 1115{ 1116 struct fs_reg l, r; 1117 fs_inst *inst; 1118 1119 /* FINISHME: arrays on the lhs */ 1120 ir->lhs->accept(this); 1121 l = this->result; 1122 1123 ir->rhs->accept(this); 1124 r = this->result; 1125 1126 assert(l.file != BAD_FILE); 1127 assert(r.file != BAD_FILE); 1128 1129 if (ir->condition) { 1130 emit_bool_to_cond_code(ir->condition); 1131 } 1132 1133 if (ir->lhs->type->is_scalar() || 1134 ir->lhs->type->is_vector()) { 1135 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1136 if (ir->write_mask & (1 << i)) { 1137 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1138 if (ir->condition) 1139 inst->predicated = true; 1140 r.reg_offset++; 1141 } 1142 l.reg_offset++; 1143 } 1144 } else { 1145 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1146 } 1147} 1148 1149fs_inst * 1150fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1151{ 1152 int mlen; 1153 int base_mrf = 1; 1154 bool simd16 = false; 1155 fs_reg orig_dst; 1156 1157 /* g0 header. */ 1158 mlen = 1; 1159 1160 if (ir->shadow_comparitor) { 1161 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1162 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1163 coordinate)); 1164 coordinate.reg_offset++; 1165 } 1166 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1167 mlen += 3; 1168 1169 if (ir->op == ir_tex) { 1170 /* There's no plain shadow compare message, so we use shadow 1171 * compare with a bias of 0.0. 1172 */ 1173 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1174 fs_reg(0.0f))); 1175 mlen++; 1176 } else if (ir->op == ir_txb) { 1177 ir->lod_info.bias->accept(this); 1178 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1179 this->result)); 1180 mlen++; 1181 } else { 1182 assert(ir->op == ir_txl); 1183 ir->lod_info.lod->accept(this); 1184 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1185 this->result)); 1186 mlen++; 1187 } 1188 1189 ir->shadow_comparitor->accept(this); 1190 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1191 mlen++; 1192 } else if (ir->op == ir_tex) { 1193 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1194 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1195 coordinate)); 1196 coordinate.reg_offset++; 1197 } 1198 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1199 mlen += 3; 1200 } else { 1201 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1202 * instructions. We'll need to do SIMD16 here. 1203 */ 1204 assert(ir->op == ir_txb || ir->op == ir_txl); 1205 1206 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1207 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1208 coordinate)); 1209 coordinate.reg_offset++; 1210 } 1211 1212 /* lod/bias appears after u/v/r. */ 1213 mlen += 6; 1214 1215 if (ir->op == ir_txb) { 1216 ir->lod_info.bias->accept(this); 1217 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1218 this->result)); 1219 mlen++; 1220 } else { 1221 ir->lod_info.lod->accept(this); 1222 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1223 this->result)); 1224 mlen++; 1225 } 1226 1227 /* The unused upper half. */ 1228 mlen++; 1229 1230 /* Now, since we're doing simd16, the return is 2 interleaved 1231 * vec4s where the odd-indexed ones are junk. We'll need to move 1232 * this weirdness around to the expected layout. 1233 */ 1234 simd16 = true; 1235 orig_dst = dst; 1236 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1237 2)); 1238 dst.type = BRW_REGISTER_TYPE_F; 1239 } 1240 1241 fs_inst *inst = NULL; 1242 switch (ir->op) { 1243 case ir_tex: 1244 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1245 break; 1246 case ir_txb: 1247 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1248 break; 1249 case ir_txl: 1250 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1251 break; 1252 case ir_txd: 1253 case ir_txf: 1254 assert(!"GLSL 1.30 features unsupported"); 1255 break; 1256 } 1257 inst->base_mrf = base_mrf; 1258 inst->mlen = mlen; 1259 1260 if (simd16) { 1261 for (int i = 0; i < 4; i++) { 1262 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1263 orig_dst.reg_offset++; 1264 dst.reg_offset += 2; 1265 } 1266 } 1267 1268 return inst; 1269} 1270 1271fs_inst * 1272fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1273{ 1274 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1275 * optional parameters like shadow comparitor or LOD bias. If 1276 * optional parameters aren't present, those base slots are 1277 * optional and don't need to be included in the message. 1278 * 1279 * We don't fill in the unnecessary slots regardless, which may 1280 * look surprising in the disassembly. 1281 */ 1282 int mlen = 1; /* g0 header always present. */ 1283 int base_mrf = 1; 1284 1285 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1286 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1287 coordinate)); 1288 coordinate.reg_offset++; 1289 } 1290 mlen += ir->coordinate->type->vector_elements; 1291 1292 if (ir->shadow_comparitor) { 1293 mlen = MAX2(mlen, 5); 1294 1295 ir->shadow_comparitor->accept(this); 1296 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1297 mlen++; 1298 } 1299 1300 fs_inst *inst = NULL; 1301 switch (ir->op) { 1302 case ir_tex: 1303 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1304 break; 1305 case ir_txb: 1306 ir->lod_info.bias->accept(this); 1307 mlen = MAX2(mlen, 5); 1308 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1309 mlen++; 1310 1311 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1312 break; 1313 case ir_txl: 1314 ir->lod_info.lod->accept(this); 1315 mlen = MAX2(mlen, 5); 1316 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1317 mlen++; 1318 1319 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1320 break; 1321 case ir_txd: 1322 case ir_txf: 1323 assert(!"GLSL 1.30 features unsupported"); 1324 break; 1325 } 1326 inst->base_mrf = base_mrf; 1327 inst->mlen = mlen; 1328 1329 return inst; 1330} 1331 1332void 1333fs_visitor::visit(ir_texture *ir) 1334{ 1335 int sampler; 1336 fs_inst *inst = NULL; 1337 1338 ir->coordinate->accept(this); 1339 fs_reg coordinate = this->result; 1340 1341 /* Should be lowered by do_lower_texture_projection */ 1342 assert(!ir->projector); 1343 1344 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1345 ctx->Shader.CurrentFragmentProgram, 1346 &brw->fragment_program->Base); 1347 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1348 1349 /* The 965 requires the EU to do the normalization of GL rectangle 1350 * texture coordinates. We use the program parameter state 1351 * tracking to get the scaling factor. 1352 */ 1353 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1354 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1355 int tokens[STATE_LENGTH] = { 1356 STATE_INTERNAL, 1357 STATE_TEXRECT_SCALE, 1358 sampler, 1359 0, 1360 0 1361 }; 1362 1363 c->prog_data.param_convert[c->prog_data.nr_params] = 1364 PARAM_NO_CONVERT; 1365 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1366 PARAM_NO_CONVERT; 1367 1368 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1369 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1370 GLuint index = _mesa_add_state_reference(params, 1371 (gl_state_index *)tokens); 1372 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 1373 1374 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; 1375 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; 1376 1377 fs_reg dst = fs_reg(this, ir->coordinate->type); 1378 fs_reg src = coordinate; 1379 coordinate = dst; 1380 1381 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1382 dst.reg_offset++; 1383 src.reg_offset++; 1384 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1385 } 1386 1387 /* Writemasking doesn't eliminate channels on SIMD8 texture 1388 * samples, so don't worry about them. 1389 */ 1390 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1391 1392 if (intel->gen < 5) { 1393 inst = emit_texture_gen4(ir, dst, coordinate); 1394 } else { 1395 inst = emit_texture_gen5(ir, dst, coordinate); 1396 } 1397 1398 inst->sampler = sampler; 1399 1400 this->result = dst; 1401 1402 if (ir->shadow_comparitor) 1403 inst->shadow_compare = true; 1404 1405 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1406 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1407 1408 for (int i = 0; i < 4; i++) { 1409 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1410 fs_reg l = swizzle_dst; 1411 l.reg_offset += i; 1412 1413 if (swiz == SWIZZLE_ZERO) { 1414 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1415 } else if (swiz == SWIZZLE_ONE) { 1416 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1417 } else { 1418 fs_reg r = dst; 1419 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1420 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1421 } 1422 } 1423 this->result = swizzle_dst; 1424 } 1425} 1426 1427void 1428fs_visitor::visit(ir_swizzle *ir) 1429{ 1430 ir->val->accept(this); 1431 fs_reg val = this->result; 1432 1433 if (ir->type->vector_elements == 1) { 1434 this->result.reg_offset += ir->mask.x; 1435 return; 1436 } 1437 1438 fs_reg result = fs_reg(this, ir->type); 1439 this->result = result; 1440 1441 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1442 fs_reg channel = val; 1443 int swiz = 0; 1444 1445 switch (i) { 1446 case 0: 1447 swiz = ir->mask.x; 1448 break; 1449 case 1: 1450 swiz = ir->mask.y; 1451 break; 1452 case 2: 1453 swiz = ir->mask.z; 1454 break; 1455 case 3: 1456 swiz = ir->mask.w; 1457 break; 1458 } 1459 1460 channel.reg_offset += swiz; 1461 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1462 result.reg_offset++; 1463 } 1464} 1465 1466void 1467fs_visitor::visit(ir_discard *ir) 1468{ 1469 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1470 1471 assert(ir->condition == NULL); /* FINISHME */ 1472 1473 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1474 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1475 kill_emitted = true; 1476} 1477 1478void 1479fs_visitor::visit(ir_constant *ir) 1480{ 1481 /* Set this->result to reg at the bottom of the function because some code 1482 * paths will cause this visitor to be applied to other fields. This will 1483 * cause the value stored in this->result to be modified. 1484 * 1485 * Make reg constant so that it doesn't get accidentally modified along the 1486 * way. Yes, I actually had this problem. :( 1487 */ 1488 const fs_reg reg(this, ir->type); 1489 fs_reg dst_reg = reg; 1490 1491 if (ir->type->is_array()) { 1492 const unsigned size = type_size(ir->type->fields.array); 1493 1494 for (unsigned i = 0; i < ir->type->length; i++) { 1495 ir->array_elements[i]->accept(this); 1496 fs_reg src_reg = this->result; 1497 1498 dst_reg.type = src_reg.type; 1499 for (unsigned j = 0; j < size; j++) { 1500 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1501 src_reg.reg_offset++; 1502 dst_reg.reg_offset++; 1503 } 1504 } 1505 } else if (ir->type->is_record()) { 1506 foreach_list(node, &ir->components) { 1507 ir_instruction *const field = (ir_instruction *) node; 1508 const unsigned size = type_size(field->type); 1509 1510 field->accept(this); 1511 fs_reg src_reg = this->result; 1512 1513 dst_reg.type = src_reg.type; 1514 for (unsigned j = 0; j < size; j++) { 1515 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1516 src_reg.reg_offset++; 1517 dst_reg.reg_offset++; 1518 } 1519 } 1520 } else { 1521 const unsigned size = type_size(ir->type); 1522 1523 for (unsigned i = 0; i < size; i++) { 1524 switch (ir->type->base_type) { 1525 case GLSL_TYPE_FLOAT: 1526 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); 1527 break; 1528 case GLSL_TYPE_UINT: 1529 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); 1530 break; 1531 case GLSL_TYPE_INT: 1532 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); 1533 break; 1534 case GLSL_TYPE_BOOL: 1535 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); 1536 break; 1537 default: 1538 assert(!"Non-float/uint/int/bool constant"); 1539 } 1540 dst_reg.reg_offset++; 1541 } 1542 } 1543 1544 this->result = reg; 1545} 1546 1547void 1548fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1549{ 1550 ir_expression *expr = ir->as_expression(); 1551 1552 if (expr) { 1553 fs_reg op[2]; 1554 fs_inst *inst; 1555 1556 assert(expr->get_num_operands() <= 2); 1557 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1558 assert(expr->operands[i]->type->is_scalar()); 1559 1560 expr->operands[i]->accept(this); 1561 op[i] = this->result; 1562 } 1563 1564 switch (expr->operation) { 1565 case ir_unop_logic_not: 1566 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1567 inst->conditional_mod = BRW_CONDITIONAL_Z; 1568 break; 1569 1570 case ir_binop_logic_xor: 1571 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1572 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1573 break; 1574 1575 case ir_binop_logic_or: 1576 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1577 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1578 break; 1579 1580 case ir_binop_logic_and: 1581 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1582 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1583 break; 1584 1585 case ir_unop_f2b: 1586 if (intel->gen >= 6) { 1587 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1588 op[0], fs_reg(0.0f))); 1589 } else { 1590 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); 1591 } 1592 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1593 break; 1594 1595 case ir_unop_i2b: 1596 if (intel->gen >= 6) { 1597 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1598 } else { 1599 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1600 } 1601 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1602 break; 1603 1604 case ir_binop_greater: 1605 case ir_binop_gequal: 1606 case ir_binop_less: 1607 case ir_binop_lequal: 1608 case ir_binop_equal: 1609 case ir_binop_all_equal: 1610 case ir_binop_nequal: 1611 case ir_binop_any_nequal: 1612 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); 1613 inst->conditional_mod = 1614 brw_conditional_for_comparison(expr->operation); 1615 break; 1616 1617 default: 1618 assert(!"not reached"); 1619 this->fail = true; 1620 break; 1621 } 1622 return; 1623 } 1624 1625 ir->accept(this); 1626 1627 if (intel->gen >= 6) { 1628 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1629 this->result, fs_reg(1))); 1630 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1631 } else { 1632 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1633 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1634 } 1635} 1636 1637/** 1638 * Emit a gen6 IF statement with the comparison folded into the IF 1639 * instruction. 1640 */ 1641void 1642fs_visitor::emit_if_gen6(ir_if *ir) 1643{ 1644 ir_expression *expr = ir->condition->as_expression(); 1645 1646 if (expr) { 1647 fs_reg op[2]; 1648 fs_inst *inst; 1649 fs_reg temp; 1650 1651 assert(expr->get_num_operands() <= 2); 1652 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1653 assert(expr->operands[i]->type->is_scalar()); 1654 1655 expr->operands[i]->accept(this); 1656 op[i] = this->result; 1657 } 1658 1659 switch (expr->operation) { 1660 case ir_unop_logic_not: 1661 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); 1662 inst->conditional_mod = BRW_CONDITIONAL_Z; 1663 return; 1664 1665 case ir_binop_logic_xor: 1666 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1667 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1668 return; 1669 1670 case ir_binop_logic_or: 1671 temp = fs_reg(this, glsl_type::bool_type); 1672 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1673 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1674 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1675 return; 1676 1677 case ir_binop_logic_and: 1678 temp = fs_reg(this, glsl_type::bool_type); 1679 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1680 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1681 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1682 return; 1683 1684 case ir_unop_f2b: 1685 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1686 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1687 return; 1688 1689 case ir_unop_i2b: 1690 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1691 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1692 return; 1693 1694 case ir_binop_greater: 1695 case ir_binop_gequal: 1696 case ir_binop_less: 1697 case ir_binop_lequal: 1698 case ir_binop_equal: 1699 case ir_binop_all_equal: 1700 case ir_binop_nequal: 1701 case ir_binop_any_nequal: 1702 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1703 inst->conditional_mod = 1704 brw_conditional_for_comparison(expr->operation); 1705 return; 1706 default: 1707 assert(!"not reached"); 1708 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1709 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1710 this->fail = true; 1711 return; 1712 } 1713 return; 1714 } 1715 1716 ir->condition->accept(this); 1717 1718 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1719 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1720} 1721 1722void 1723fs_visitor::visit(ir_if *ir) 1724{ 1725 fs_inst *inst; 1726 1727 /* Don't point the annotation at the if statement, because then it plus 1728 * the then and else blocks get printed. 1729 */ 1730 this->base_ir = ir->condition; 1731 1732 if (intel->gen >= 6) { 1733 emit_if_gen6(ir); 1734 } else { 1735 emit_bool_to_cond_code(ir->condition); 1736 1737 inst = emit(fs_inst(BRW_OPCODE_IF)); 1738 inst->predicated = true; 1739 } 1740 1741 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1742 ir_instruction *ir = (ir_instruction *)iter.get(); 1743 this->base_ir = ir; 1744 1745 ir->accept(this); 1746 } 1747 1748 if (!ir->else_instructions.is_empty()) { 1749 emit(fs_inst(BRW_OPCODE_ELSE)); 1750 1751 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1752 ir_instruction *ir = (ir_instruction *)iter.get(); 1753 this->base_ir = ir; 1754 1755 ir->accept(this); 1756 } 1757 } 1758 1759 emit(fs_inst(BRW_OPCODE_ENDIF)); 1760} 1761 1762void 1763fs_visitor::visit(ir_loop *ir) 1764{ 1765 fs_reg counter = reg_undef; 1766 1767 if (ir->counter) { 1768 this->base_ir = ir->counter; 1769 ir->counter->accept(this); 1770 counter = *(variable_storage(ir->counter)); 1771 1772 if (ir->from) { 1773 this->base_ir = ir->from; 1774 ir->from->accept(this); 1775 1776 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1777 } 1778 } 1779 1780 emit(fs_inst(BRW_OPCODE_DO)); 1781 1782 if (ir->to) { 1783 this->base_ir = ir->to; 1784 ir->to->accept(this); 1785 1786 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, 1787 counter, this->result)); 1788 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1789 1790 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1791 inst->predicated = true; 1792 } 1793 1794 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1795 ir_instruction *ir = (ir_instruction *)iter.get(); 1796 1797 this->base_ir = ir; 1798 ir->accept(this); 1799 } 1800 1801 if (ir->increment) { 1802 this->base_ir = ir->increment; 1803 ir->increment->accept(this); 1804 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1805 } 1806 1807 emit(fs_inst(BRW_OPCODE_WHILE)); 1808} 1809 1810void 1811fs_visitor::visit(ir_loop_jump *ir) 1812{ 1813 switch (ir->mode) { 1814 case ir_loop_jump::jump_break: 1815 emit(fs_inst(BRW_OPCODE_BREAK)); 1816 break; 1817 case ir_loop_jump::jump_continue: 1818 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1819 break; 1820 } 1821} 1822 1823void 1824fs_visitor::visit(ir_call *ir) 1825{ 1826 assert(!"FINISHME"); 1827} 1828 1829void 1830fs_visitor::visit(ir_return *ir) 1831{ 1832 assert(!"FINISHME"); 1833} 1834 1835void 1836fs_visitor::visit(ir_function *ir) 1837{ 1838 /* Ignore function bodies other than main() -- we shouldn't see calls to 1839 * them since they should all be inlined before we get to ir_to_mesa. 1840 */ 1841 if (strcmp(ir->name, "main") == 0) { 1842 const ir_function_signature *sig; 1843 exec_list empty; 1844 1845 sig = ir->matching_signature(&empty); 1846 1847 assert(sig); 1848 1849 foreach_iter(exec_list_iterator, iter, sig->body) { 1850 ir_instruction *ir = (ir_instruction *)iter.get(); 1851 this->base_ir = ir; 1852 1853 ir->accept(this); 1854 } 1855 } 1856} 1857 1858void 1859fs_visitor::visit(ir_function_signature *ir) 1860{ 1861 assert(!"not reached"); 1862 (void)ir; 1863} 1864 1865fs_inst * 1866fs_visitor::emit(fs_inst inst) 1867{ 1868 fs_inst *list_inst = new(mem_ctx) fs_inst; 1869 *list_inst = inst; 1870 1871 list_inst->annotation = this->current_annotation; 1872 list_inst->ir = this->base_ir; 1873 1874 this->instructions.push_tail(list_inst); 1875 1876 return list_inst; 1877} 1878 1879/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1880void 1881fs_visitor::emit_dummy_fs() 1882{ 1883 /* Everyone's favorite color. */ 1884 emit(fs_inst(BRW_OPCODE_MOV, 1885 fs_reg(MRF, 2), 1886 fs_reg(1.0f))); 1887 emit(fs_inst(BRW_OPCODE_MOV, 1888 fs_reg(MRF, 3), 1889 fs_reg(0.0f))); 1890 emit(fs_inst(BRW_OPCODE_MOV, 1891 fs_reg(MRF, 4), 1892 fs_reg(1.0f))); 1893 emit(fs_inst(BRW_OPCODE_MOV, 1894 fs_reg(MRF, 5), 1895 fs_reg(0.0f))); 1896 1897 fs_inst *write; 1898 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1899 fs_reg(0), 1900 fs_reg(0))); 1901 write->base_mrf = 0; 1902} 1903 1904/* The register location here is relative to the start of the URB 1905 * data. It will get adjusted to be a real location before 1906 * generate_code() time. 1907 */ 1908struct brw_reg 1909fs_visitor::interp_reg(int location, int channel) 1910{ 1911 int regnr = urb_setup[location] * 2 + channel / 2; 1912 int stride = (channel & 1) * 4; 1913 1914 assert(urb_setup[location] != -1); 1915 1916 return brw_vec1_grf(regnr, stride); 1917} 1918 1919/** Emits the interpolation for the varying inputs. */ 1920void 1921fs_visitor::emit_interpolation_setup_gen4() 1922{ 1923 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1924 1925 this->current_annotation = "compute pixel centers"; 1926 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1927 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1928 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1929 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1930 emit(fs_inst(BRW_OPCODE_ADD, 1931 this->pixel_x, 1932 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1933 fs_reg(brw_imm_v(0x10101010)))); 1934 emit(fs_inst(BRW_OPCODE_ADD, 1935 this->pixel_y, 1936 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1937 fs_reg(brw_imm_v(0x11001100)))); 1938 1939 this->current_annotation = "compute pixel deltas from v0"; 1940 if (brw->has_pln) { 1941 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1942 this->delta_y = this->delta_x; 1943 this->delta_y.reg_offset++; 1944 } else { 1945 this->delta_x = fs_reg(this, glsl_type::float_type); 1946 this->delta_y = fs_reg(this, glsl_type::float_type); 1947 } 1948 emit(fs_inst(BRW_OPCODE_ADD, 1949 this->delta_x, 1950 this->pixel_x, 1951 fs_reg(negate(brw_vec1_grf(1, 0))))); 1952 emit(fs_inst(BRW_OPCODE_ADD, 1953 this->delta_y, 1954 this->pixel_y, 1955 fs_reg(negate(brw_vec1_grf(1, 1))))); 1956 1957 this->current_annotation = "compute pos.w and 1/pos.w"; 1958 /* Compute wpos.w. It's always in our setup, since it's needed to 1959 * interpolate the other attributes. 1960 */ 1961 this->wpos_w = fs_reg(this, glsl_type::float_type); 1962 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1963 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1964 /* Compute the pixel 1/W value from wpos.w. */ 1965 this->pixel_w = fs_reg(this, glsl_type::float_type); 1966 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1967 this->current_annotation = NULL; 1968} 1969 1970/** Emits the interpolation for the varying inputs. */ 1971void 1972fs_visitor::emit_interpolation_setup_gen6() 1973{ 1974 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1975 1976 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1977 this->current_annotation = "compute pixel centers"; 1978 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1979 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1980 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1981 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1982 emit(fs_inst(BRW_OPCODE_ADD, 1983 int_pixel_x, 1984 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1985 fs_reg(brw_imm_v(0x10101010)))); 1986 emit(fs_inst(BRW_OPCODE_ADD, 1987 int_pixel_y, 1988 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1989 fs_reg(brw_imm_v(0x11001100)))); 1990 1991 /* As of gen6, we can no longer mix float and int sources. We have 1992 * to turn the integer pixel centers into floats for their actual 1993 * use. 1994 */ 1995 this->pixel_x = fs_reg(this, glsl_type::float_type); 1996 this->pixel_y = fs_reg(this, glsl_type::float_type); 1997 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1998 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1999 2000 this->current_annotation = "compute 1/pos.w"; 2001 this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 2002 this->pixel_w = fs_reg(this, glsl_type::float_type); 2003 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 2004 2005 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 2006 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 2007 2008 this->current_annotation = NULL; 2009} 2010 2011void 2012fs_visitor::emit_fb_writes() 2013{ 2014 this->current_annotation = "FB write header"; 2015 GLboolean header_present = GL_TRUE; 2016 int nr = 0; 2017 2018 if (intel->gen >= 6 && 2019 !this->kill_emitted && 2020 c->key.nr_color_regions == 1) { 2021 header_present = false; 2022 } 2023 2024 if (header_present) { 2025 /* m0, m1 header */ 2026 nr += 2; 2027 } 2028 2029 if (c->aa_dest_stencil_reg) { 2030 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2031 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); 2032 } 2033 2034 /* Reserve space for color. It'll be filled in per MRT below. */ 2035 int color_mrf = nr; 2036 nr += 4; 2037 2038 if (c->source_depth_to_render_target) { 2039 if (c->computes_depth) { 2040 /* Hand over gl_FragDepth. */ 2041 assert(this->frag_depth); 2042 fs_reg depth = *(variable_storage(this->frag_depth)); 2043 2044 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 2045 } else { 2046 /* Pass through the payload depth. */ 2047 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2048 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 2049 } 2050 } 2051 2052 if (c->dest_depth_reg) { 2053 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2054 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); 2055 } 2056 2057 fs_reg color = reg_undef; 2058 if (this->frag_color) 2059 color = *(variable_storage(this->frag_color)); 2060 else if (this->frag_data) { 2061 color = *(variable_storage(this->frag_data)); 2062 color.type = BRW_REGISTER_TYPE_F; 2063 } 2064 2065 for (int target = 0; target < c->key.nr_color_regions; target++) { 2066 this->current_annotation = talloc_asprintf(this->mem_ctx, 2067 "FB write target %d", 2068 target); 2069 if (this->frag_color || this->frag_data) { 2070 for (int i = 0; i < 4; i++) { 2071 emit(fs_inst(BRW_OPCODE_MOV, 2072 fs_reg(MRF, color_mrf + i), 2073 color)); 2074 color.reg_offset++; 2075 } 2076 } 2077 2078 if (this->frag_color) 2079 color.reg_offset -= 4; 2080 2081 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2082 reg_undef, reg_undef)); 2083 inst->target = target; 2084 inst->base_mrf = 0; 2085 inst->mlen = nr; 2086 if (target == c->key.nr_color_regions - 1) 2087 inst->eot = true; 2088 inst->header_present = header_present; 2089 } 2090 2091 if (c->key.nr_color_regions == 0) { 2092 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2093 reg_undef, reg_undef)); 2094 inst->base_mrf = 0; 2095 inst->mlen = nr; 2096 inst->eot = true; 2097 inst->header_present = header_present; 2098 } 2099 2100 this->current_annotation = NULL; 2101} 2102 2103void 2104fs_visitor::generate_fb_write(fs_inst *inst) 2105{ 2106 GLboolean eot = inst->eot; 2107 struct brw_reg implied_header; 2108 2109 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 2110 * move, here's g1. 2111 */ 2112 brw_push_insn_state(p); 2113 brw_set_mask_control(p, BRW_MASK_DISABLE); 2114 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2115 2116 if (inst->header_present) { 2117 if (intel->gen >= 6) { 2118 brw_MOV(p, 2119 brw_message_reg(inst->base_mrf), 2120 brw_vec8_grf(0, 0)); 2121 2122 if (inst->target > 0) { 2123 /* Set the render target index for choosing BLEND_STATE. */ 2124 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 2125 BRW_REGISTER_TYPE_UD), 2126 brw_imm_ud(inst->target)); 2127 } 2128 2129 /* Clear viewport index, render target array index. */ 2130 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 2131 BRW_REGISTER_TYPE_UD), 2132 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 2133 brw_imm_ud(0xf7ff)); 2134 2135 implied_header = brw_null_reg(); 2136 } else { 2137 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 2138 } 2139 2140 brw_MOV(p, 2141 brw_message_reg(inst->base_mrf + 1), 2142 brw_vec8_grf(1, 0)); 2143 } else { 2144 implied_header = brw_null_reg(); 2145 } 2146 2147 brw_pop_insn_state(p); 2148 2149 brw_fb_WRITE(p, 2150 8, /* dispatch_width */ 2151 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 2152 inst->base_mrf, 2153 implied_header, 2154 inst->target, 2155 inst->mlen, 2156 0, 2157 eot, 2158 inst->header_present); 2159} 2160 2161void 2162fs_visitor::generate_linterp(fs_inst *inst, 2163 struct brw_reg dst, struct brw_reg *src) 2164{ 2165 struct brw_reg delta_x = src[0]; 2166 struct brw_reg delta_y = src[1]; 2167 struct brw_reg interp = src[2]; 2168 2169 if (brw->has_pln && 2170 delta_y.nr == delta_x.nr + 1 && 2171 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2172 brw_PLN(p, dst, interp, delta_x); 2173 } else { 2174 brw_LINE(p, brw_null_reg(), interp, delta_x); 2175 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2176 } 2177} 2178 2179void 2180fs_visitor::generate_math(fs_inst *inst, 2181 struct brw_reg dst, struct brw_reg *src) 2182{ 2183 int op; 2184 2185 switch (inst->opcode) { 2186 case FS_OPCODE_RCP: 2187 op = BRW_MATH_FUNCTION_INV; 2188 break; 2189 case FS_OPCODE_RSQ: 2190 op = BRW_MATH_FUNCTION_RSQ; 2191 break; 2192 case FS_OPCODE_SQRT: 2193 op = BRW_MATH_FUNCTION_SQRT; 2194 break; 2195 case FS_OPCODE_EXP2: 2196 op = BRW_MATH_FUNCTION_EXP; 2197 break; 2198 case FS_OPCODE_LOG2: 2199 op = BRW_MATH_FUNCTION_LOG; 2200 break; 2201 case FS_OPCODE_POW: 2202 op = BRW_MATH_FUNCTION_POW; 2203 break; 2204 case FS_OPCODE_SIN: 2205 op = BRW_MATH_FUNCTION_SIN; 2206 break; 2207 case FS_OPCODE_COS: 2208 op = BRW_MATH_FUNCTION_COS; 2209 break; 2210 default: 2211 assert(!"not reached: unknown math function"); 2212 op = 0; 2213 break; 2214 } 2215 2216 if (intel->gen >= 6) { 2217 assert(inst->mlen == 0); 2218 2219 if (inst->opcode == FS_OPCODE_POW) { 2220 brw_math2(p, dst, op, src[0], src[1]); 2221 } else { 2222 brw_math(p, dst, 2223 op, 2224 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2225 BRW_MATH_SATURATE_NONE, 2226 0, src[0], 2227 BRW_MATH_DATA_VECTOR, 2228 BRW_MATH_PRECISION_FULL); 2229 } 2230 } else { 2231 assert(inst->mlen >= 1); 2232 2233 brw_math(p, dst, 2234 op, 2235 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2236 BRW_MATH_SATURATE_NONE, 2237 inst->base_mrf, src[0], 2238 BRW_MATH_DATA_VECTOR, 2239 BRW_MATH_PRECISION_FULL); 2240 } 2241} 2242 2243void 2244fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 2245{ 2246 int msg_type = -1; 2247 int rlen = 4; 2248 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2249 2250 if (intel->gen >= 5) { 2251 switch (inst->opcode) { 2252 case FS_OPCODE_TEX: 2253 if (inst->shadow_compare) { 2254 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2255 } else { 2256 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2257 } 2258 break; 2259 case FS_OPCODE_TXB: 2260 if (inst->shadow_compare) { 2261 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2262 } else { 2263 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2264 } 2265 break; 2266 } 2267 } else { 2268 switch (inst->opcode) { 2269 case FS_OPCODE_TEX: 2270 /* Note that G45 and older determines shadow compare and dispatch width 2271 * from message length for most messages. 2272 */ 2273 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2274 if (inst->shadow_compare) { 2275 assert(inst->mlen == 6); 2276 } else { 2277 assert(inst->mlen <= 4); 2278 } 2279 break; 2280 case FS_OPCODE_TXB: 2281 if (inst->shadow_compare) { 2282 assert(inst->mlen == 6); 2283 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2284 } else { 2285 assert(inst->mlen == 9); 2286 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2287 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2288 } 2289 break; 2290 } 2291 } 2292 assert(msg_type != -1); 2293 2294 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2295 rlen = 8; 2296 dst = vec16(dst); 2297 } 2298 2299 brw_SAMPLE(p, 2300 retype(dst, BRW_REGISTER_TYPE_UW), 2301 inst->base_mrf, 2302 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2303 SURF_INDEX_TEXTURE(inst->sampler), 2304 inst->sampler, 2305 WRITEMASK_XYZW, 2306 msg_type, 2307 rlen, 2308 inst->mlen, 2309 0, 2310 1, 2311 simd_mode); 2312} 2313 2314 2315/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2316 * looking like: 2317 * 2318 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2319 * 2320 * and we're trying to produce: 2321 * 2322 * DDX DDY 2323 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2324 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2325 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2326 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2327 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2328 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2329 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2330 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2331 * 2332 * and add another set of two more subspans if in 16-pixel dispatch mode. 2333 * 2334 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2335 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2336 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2337 * between each other. We could probably do it like ddx and swizzle the right 2338 * order later, but bail for now and just produce 2339 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2340 */ 2341void 2342fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2343{ 2344 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2345 BRW_REGISTER_TYPE_F, 2346 BRW_VERTICAL_STRIDE_2, 2347 BRW_WIDTH_2, 2348 BRW_HORIZONTAL_STRIDE_0, 2349 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2350 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2351 BRW_REGISTER_TYPE_F, 2352 BRW_VERTICAL_STRIDE_2, 2353 BRW_WIDTH_2, 2354 BRW_HORIZONTAL_STRIDE_0, 2355 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2356 brw_ADD(p, dst, src0, negate(src1)); 2357} 2358 2359void 2360fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2361{ 2362 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2363 BRW_REGISTER_TYPE_F, 2364 BRW_VERTICAL_STRIDE_4, 2365 BRW_WIDTH_4, 2366 BRW_HORIZONTAL_STRIDE_0, 2367 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2368 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2369 BRW_REGISTER_TYPE_F, 2370 BRW_VERTICAL_STRIDE_4, 2371 BRW_WIDTH_4, 2372 BRW_HORIZONTAL_STRIDE_0, 2373 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2374 brw_ADD(p, dst, src0, negate(src1)); 2375} 2376 2377void 2378fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2379{ 2380 if (intel->gen >= 6) { 2381 /* Gen6 no longer has the mask reg for us to just read the 2382 * active channels from. However, cmp updates just the channels 2383 * of the flag reg that are enabled, so we can get at the 2384 * channel enables that way. In this step, make a reg of ones 2385 * we'll compare to. 2386 */ 2387 brw_MOV(p, mask, brw_imm_ud(1)); 2388 } else { 2389 brw_push_insn_state(p); 2390 brw_set_mask_control(p, BRW_MASK_DISABLE); 2391 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2392 brw_pop_insn_state(p); 2393 } 2394} 2395 2396void 2397fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2398{ 2399 if (intel->gen >= 6) { 2400 struct brw_reg f0 = brw_flag_reg(); 2401 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2402 2403 brw_push_insn_state(p); 2404 brw_set_mask_control(p, BRW_MASK_DISABLE); 2405 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2406 brw_pop_insn_state(p); 2407 2408 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2409 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2410 /* Undo CMP's whacking of predication*/ 2411 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2412 2413 brw_push_insn_state(p); 2414 brw_set_mask_control(p, BRW_MASK_DISABLE); 2415 brw_AND(p, g1, f0, g1); 2416 brw_pop_insn_state(p); 2417 } else { 2418 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2419 2420 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2421 2422 brw_push_insn_state(p); 2423 brw_set_mask_control(p, BRW_MASK_DISABLE); 2424 brw_AND(p, g0, mask, g0); 2425 brw_pop_insn_state(p); 2426 } 2427} 2428 2429void 2430fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2431{ 2432 assert(inst->mlen != 0); 2433 2434 brw_MOV(p, 2435 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2436 retype(src, BRW_REGISTER_TYPE_UD)); 2437 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2438 inst->offset); 2439} 2440 2441void 2442fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2443{ 2444 assert(inst->mlen != 0); 2445 2446 /* Clear any post destination dependencies that would be ignored by 2447 * the block read. See the B-Spec for pre-gen5 send instruction. 2448 * 2449 * This could use a better solution, since texture sampling and 2450 * math reads could potentially run into it as well -- anywhere 2451 * that we have a SEND with a destination that is a register that 2452 * was written but not read within the last N instructions (what's 2453 * N? unsure). This is rare because of dead code elimination, but 2454 * not impossible. 2455 */ 2456 if (intel->gen == 4 && !intel->is_g4x) 2457 brw_MOV(p, brw_null_reg(), dst); 2458 2459 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2460 inst->offset); 2461 2462 if (intel->gen == 4 && !intel->is_g4x) { 2463 /* gen4 errata: destination from a send can't be used as a 2464 * destination until it's been read. Just read it so we don't 2465 * have to worry. 2466 */ 2467 brw_MOV(p, brw_null_reg(), dst); 2468 } 2469} 2470 2471 2472void 2473fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2474{ 2475 assert(inst->mlen != 0); 2476 2477 /* Clear any post destination dependencies that would be ignored by 2478 * the block read. See the B-Spec for pre-gen5 send instruction. 2479 * 2480 * This could use a better solution, since texture sampling and 2481 * math reads could potentially run into it as well -- anywhere 2482 * that we have a SEND with a destination that is a register that 2483 * was written but not read within the last N instructions (what's 2484 * N? unsure). This is rare because of dead code elimination, but 2485 * not impossible. 2486 */ 2487 if (intel->gen == 4 && !intel->is_g4x) 2488 brw_MOV(p, brw_null_reg(), dst); 2489 2490 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2491 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2492 2493 if (intel->gen == 4 && !intel->is_g4x) { 2494 /* gen4 errata: destination from a send can't be used as a 2495 * destination until it's been read. Just read it so we don't 2496 * have to worry. 2497 */ 2498 brw_MOV(p, brw_null_reg(), dst); 2499 } 2500} 2501 2502void 2503fs_visitor::assign_curb_setup() 2504{ 2505 c->prog_data.first_curbe_grf = c->nr_payload_regs; 2506 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2507 2508 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2509 foreach_iter(exec_list_iterator, iter, this->instructions) { 2510 fs_inst *inst = (fs_inst *)iter.get(); 2511 2512 for (unsigned int i = 0; i < 3; i++) { 2513 if (inst->src[i].file == UNIFORM) { 2514 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2515 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2516 constant_nr / 8, 2517 constant_nr % 8); 2518 2519 inst->src[i].file = FIXED_HW_REG; 2520 inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 2521 } 2522 } 2523 } 2524} 2525 2526void 2527fs_visitor::calculate_urb_setup() 2528{ 2529 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2530 urb_setup[i] = -1; 2531 } 2532 2533 int urb_next = 0; 2534 /* Figure out where each of the incoming setup attributes lands. */ 2535 if (intel->gen >= 6) { 2536 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2537 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2538 urb_setup[i] = urb_next++; 2539 } 2540 } 2541 } else { 2542 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2543 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2544 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2545 int fp_index; 2546 2547 if (i >= VERT_RESULT_VAR0) 2548 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2549 else if (i <= VERT_RESULT_TEX7) 2550 fp_index = i; 2551 else 2552 fp_index = -1; 2553 2554 if (fp_index >= 0) 2555 urb_setup[fp_index] = urb_next++; 2556 } 2557 } 2558 } 2559 2560 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2561 c->prog_data.urb_read_length = urb_next * 2; 2562} 2563 2564void 2565fs_visitor::assign_urb_setup() 2566{ 2567 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2568 2569 /* Offset all the urb_setup[] index by the actual position of the 2570 * setup regs, now that the location of the constants has been chosen. 2571 */ 2572 foreach_iter(exec_list_iterator, iter, this->instructions) { 2573 fs_inst *inst = (fs_inst *)iter.get(); 2574 2575 if (inst->opcode == FS_OPCODE_LINTERP) { 2576 assert(inst->src[2].file == FIXED_HW_REG); 2577 inst->src[2].fixed_hw_reg.nr += urb_start; 2578 } 2579 2580 if (inst->opcode == FS_OPCODE_CINTERP) { 2581 assert(inst->src[0].file == FIXED_HW_REG); 2582 inst->src[0].fixed_hw_reg.nr += urb_start; 2583 } 2584 } 2585 2586 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2587} 2588 2589/** 2590 * Split large virtual GRFs into separate components if we can. 2591 * 2592 * This is mostly duplicated with what brw_fs_vector_splitting does, 2593 * but that's really conservative because it's afraid of doing 2594 * splitting that doesn't result in real progress after the rest of 2595 * the optimization phases, which would cause infinite looping in 2596 * optimization. We can do it once here, safely. This also has the 2597 * opportunity to split interpolated values, or maybe even uniforms, 2598 * which we don't have at the IR level. 2599 * 2600 * We want to split, because virtual GRFs are what we register 2601 * allocate and spill (due to contiguousness requirements for some 2602 * instructions), and they're what we naturally generate in the 2603 * codegen process, but most virtual GRFs don't actually need to be 2604 * contiguous sets of GRFs. If we split, we'll end up with reduced 2605 * live intervals and better dead code elimination and coalescing. 2606 */ 2607void 2608fs_visitor::split_virtual_grfs() 2609{ 2610 int num_vars = this->virtual_grf_next; 2611 bool split_grf[num_vars]; 2612 int new_virtual_grf[num_vars]; 2613 2614 /* Try to split anything > 0 sized. */ 2615 for (int i = 0; i < num_vars; i++) { 2616 if (this->virtual_grf_sizes[i] != 1) 2617 split_grf[i] = true; 2618 else 2619 split_grf[i] = false; 2620 } 2621 2622 if (brw->has_pln) { 2623 /* PLN opcodes rely on the delta_xy being contiguous. */ 2624 split_grf[this->delta_x.reg] = false; 2625 } 2626 2627 foreach_iter(exec_list_iterator, iter, this->instructions) { 2628 fs_inst *inst = (fs_inst *)iter.get(); 2629 2630 /* Texturing produces 4 contiguous registers, so no splitting. */ 2631 if ((inst->opcode == FS_OPCODE_TEX || 2632 inst->opcode == FS_OPCODE_TXB || 2633 inst->opcode == FS_OPCODE_TXL) && 2634 inst->dst.file == GRF) { 2635 split_grf[inst->dst.reg] = false; 2636 } 2637 } 2638 2639 /* Allocate new space for split regs. Note that the virtual 2640 * numbers will be contiguous. 2641 */ 2642 for (int i = 0; i < num_vars; i++) { 2643 if (split_grf[i]) { 2644 new_virtual_grf[i] = virtual_grf_alloc(1); 2645 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2646 int reg = virtual_grf_alloc(1); 2647 assert(reg == new_virtual_grf[i] + j - 1); 2648 (void) reg; 2649 } 2650 this->virtual_grf_sizes[i] = 1; 2651 } 2652 } 2653 2654 foreach_iter(exec_list_iterator, iter, this->instructions) { 2655 fs_inst *inst = (fs_inst *)iter.get(); 2656 2657 if (inst->dst.file == GRF && 2658 split_grf[inst->dst.reg] && 2659 inst->dst.reg_offset != 0) { 2660 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2661 inst->dst.reg_offset - 1); 2662 inst->dst.reg_offset = 0; 2663 } 2664 for (int i = 0; i < 3; i++) { 2665 if (inst->src[i].file == GRF && 2666 split_grf[inst->src[i].reg] && 2667 inst->src[i].reg_offset != 0) { 2668 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2669 inst->src[i].reg_offset - 1); 2670 inst->src[i].reg_offset = 0; 2671 } 2672 } 2673 } 2674 this->live_intervals_valid = false; 2675} 2676 2677/** 2678 * Choose accesses from the UNIFORM file to demote to using the pull 2679 * constant buffer. 2680 * 2681 * We allow a fragment shader to have more than the specified minimum 2682 * maximum number of fragment shader uniform components (64). If 2683 * there are too many of these, they'd fill up all of register space. 2684 * So, this will push some of them out to the pull constant buffer and 2685 * update the program to load them. 2686 */ 2687void 2688fs_visitor::setup_pull_constants() 2689{ 2690 /* Only allow 16 registers (128 uniform components) as push constants. */ 2691 unsigned int max_uniform_components = 16 * 8; 2692 if (c->prog_data.nr_params <= max_uniform_components) 2693 return; 2694 2695 /* Just demote the end of the list. We could probably do better 2696 * here, demoting things that are rarely used in the program first. 2697 */ 2698 int pull_uniform_base = max_uniform_components; 2699 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2700 2701 foreach_iter(exec_list_iterator, iter, this->instructions) { 2702 fs_inst *inst = (fs_inst *)iter.get(); 2703 2704 for (int i = 0; i < 3; i++) { 2705 if (inst->src[i].file != UNIFORM) 2706 continue; 2707 2708 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2709 if (uniform_nr < pull_uniform_base) 2710 continue; 2711 2712 fs_reg dst = fs_reg(this, glsl_type::float_type); 2713 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2714 dst); 2715 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2716 pull->ir = inst->ir; 2717 pull->annotation = inst->annotation; 2718 pull->base_mrf = 14; 2719 pull->mlen = 1; 2720 2721 inst->insert_before(pull); 2722 2723 inst->src[i].file = GRF; 2724 inst->src[i].reg = dst.reg; 2725 inst->src[i].reg_offset = 0; 2726 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2727 } 2728 } 2729 2730 for (int i = 0; i < pull_uniform_count; i++) { 2731 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2732 c->prog_data.pull_param_convert[i] = 2733 c->prog_data.param_convert[pull_uniform_base + i]; 2734 } 2735 c->prog_data.nr_params -= pull_uniform_count; 2736 c->prog_data.nr_pull_params = pull_uniform_count; 2737} 2738 2739void 2740fs_visitor::calculate_live_intervals() 2741{ 2742 int num_vars = this->virtual_grf_next; 2743 int *def = talloc_array(mem_ctx, int, num_vars); 2744 int *use = talloc_array(mem_ctx, int, num_vars); 2745 int loop_depth = 0; 2746 int loop_start = 0; 2747 int bb_header_ip = 0; 2748 2749 if (this->live_intervals_valid) 2750 return; 2751 2752 for (int i = 0; i < num_vars; i++) { 2753 def[i] = 1 << 30; 2754 use[i] = -1; 2755 } 2756 2757 int ip = 0; 2758 foreach_iter(exec_list_iterator, iter, this->instructions) { 2759 fs_inst *inst = (fs_inst *)iter.get(); 2760 2761 if (inst->opcode == BRW_OPCODE_DO) { 2762 if (loop_depth++ == 0) 2763 loop_start = ip; 2764 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2765 loop_depth--; 2766 2767 if (loop_depth == 0) { 2768 /* Patches up the use of vars marked for being live across 2769 * the whole loop. 2770 */ 2771 for (int i = 0; i < num_vars; i++) { 2772 if (use[i] == loop_start) { 2773 use[i] = ip; 2774 } 2775 } 2776 } 2777 } else { 2778 for (unsigned int i = 0; i < 3; i++) { 2779 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2780 int reg = inst->src[i].reg; 2781 2782 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2783 def[reg] >= bb_header_ip)) { 2784 use[reg] = ip; 2785 } else { 2786 def[reg] = MIN2(loop_start, def[reg]); 2787 use[reg] = loop_start; 2788 2789 /* Nobody else is going to go smash our start to 2790 * later in the loop now, because def[reg] now 2791 * points before the bb header. 2792 */ 2793 } 2794 } 2795 } 2796 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2797 int reg = inst->dst.reg; 2798 2799 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2800 !inst->predicated)) { 2801 def[reg] = MIN2(def[reg], ip); 2802 } else { 2803 def[reg] = MIN2(def[reg], loop_start); 2804 } 2805 } 2806 } 2807 2808 ip++; 2809 2810 /* Set the basic block header IP. This is used for determining 2811 * if a complete def of single-register virtual GRF in a loop 2812 * dominates a use in the same basic block. It's a quick way to 2813 * reduce the live interval range of most register used in a 2814 * loop. 2815 */ 2816 if (inst->opcode == BRW_OPCODE_IF || 2817 inst->opcode == BRW_OPCODE_ELSE || 2818 inst->opcode == BRW_OPCODE_ENDIF || 2819 inst->opcode == BRW_OPCODE_DO || 2820 inst->opcode == BRW_OPCODE_WHILE || 2821 inst->opcode == BRW_OPCODE_BREAK || 2822 inst->opcode == BRW_OPCODE_CONTINUE) { 2823 bb_header_ip = ip; 2824 } 2825 } 2826 2827 talloc_free(this->virtual_grf_def); 2828 talloc_free(this->virtual_grf_use); 2829 this->virtual_grf_def = def; 2830 this->virtual_grf_use = use; 2831 2832 this->live_intervals_valid = true; 2833} 2834 2835/** 2836 * Attempts to move immediate constants into the immediate 2837 * constant slot of following instructions. 2838 * 2839 * Immediate constants are a bit tricky -- they have to be in the last 2840 * operand slot, you can't do abs/negate on them, 2841 */ 2842 2843bool 2844fs_visitor::propagate_constants() 2845{ 2846 bool progress = false; 2847 2848 calculate_live_intervals(); 2849 2850 foreach_iter(exec_list_iterator, iter, this->instructions) { 2851 fs_inst *inst = (fs_inst *)iter.get(); 2852 2853 if (inst->opcode != BRW_OPCODE_MOV || 2854 inst->predicated || 2855 inst->dst.file != GRF || inst->src[0].file != IMM || 2856 inst->dst.type != inst->src[0].type) 2857 continue; 2858 2859 /* Don't bother with cases where we should have had the 2860 * operation on the constant folded in GLSL already. 2861 */ 2862 if (inst->saturate) 2863 continue; 2864 2865 /* Found a move of a constant to a GRF. Find anything else using the GRF 2866 * before it's written, and replace it with the constant if we can. 2867 */ 2868 exec_list_iterator scan_iter = iter; 2869 scan_iter.next(); 2870 for (; scan_iter.has_next(); scan_iter.next()) { 2871 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2872 2873 if (scan_inst->opcode == BRW_OPCODE_DO || 2874 scan_inst->opcode == BRW_OPCODE_WHILE || 2875 scan_inst->opcode == BRW_OPCODE_ELSE || 2876 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2877 break; 2878 } 2879 2880 for (int i = 2; i >= 0; i--) { 2881 if (scan_inst->src[i].file != GRF || 2882 scan_inst->src[i].reg != inst->dst.reg || 2883 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2884 continue; 2885 2886 /* Don't bother with cases where we should have had the 2887 * operation on the constant folded in GLSL already. 2888 */ 2889 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2890 continue; 2891 2892 switch (scan_inst->opcode) { 2893 case BRW_OPCODE_MOV: 2894 scan_inst->src[i] = inst->src[0]; 2895 progress = true; 2896 break; 2897 2898 case BRW_OPCODE_MUL: 2899 case BRW_OPCODE_ADD: 2900 if (i == 1) { 2901 scan_inst->src[i] = inst->src[0]; 2902 progress = true; 2903 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2904 /* Fit this constant in by commuting the operands */ 2905 scan_inst->src[0] = scan_inst->src[1]; 2906 scan_inst->src[1] = inst->src[0]; 2907 progress = true; 2908 } 2909 break; 2910 case BRW_OPCODE_CMP: 2911 case BRW_OPCODE_SEL: 2912 if (i == 1) { 2913 scan_inst->src[i] = inst->src[0]; 2914 progress = true; 2915 } 2916 } 2917 } 2918 2919 if (scan_inst->dst.file == GRF && 2920 scan_inst->dst.reg == inst->dst.reg && 2921 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2922 scan_inst->opcode == FS_OPCODE_TEX)) { 2923 break; 2924 } 2925 } 2926 } 2927 2928 if (progress) 2929 this->live_intervals_valid = false; 2930 2931 return progress; 2932} 2933/** 2934 * Must be called after calculate_live_intervales() to remove unused 2935 * writes to registers -- register allocation will fail otherwise 2936 * because something deffed but not used won't be considered to 2937 * interfere with other regs. 2938 */ 2939bool 2940fs_visitor::dead_code_eliminate() 2941{ 2942 bool progress = false; 2943 int pc = 0; 2944 2945 calculate_live_intervals(); 2946 2947 foreach_iter(exec_list_iterator, iter, this->instructions) { 2948 fs_inst *inst = (fs_inst *)iter.get(); 2949 2950 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 2951 inst->remove(); 2952 progress = true; 2953 } 2954 2955 pc++; 2956 } 2957 2958 if (progress) 2959 live_intervals_valid = false; 2960 2961 return progress; 2962} 2963 2964bool 2965fs_visitor::register_coalesce() 2966{ 2967 bool progress = false; 2968 int if_depth = 0; 2969 int loop_depth = 0; 2970 2971 foreach_iter(exec_list_iterator, iter, this->instructions) { 2972 fs_inst *inst = (fs_inst *)iter.get(); 2973 2974 /* Make sure that we dominate the instructions we're going to 2975 * scan for interfering with our coalescing, or we won't have 2976 * scanned enough to see if anything interferes with our 2977 * coalescing. We don't dominate the following instructions if 2978 * we're in a loop or an if block. 2979 */ 2980 switch (inst->opcode) { 2981 case BRW_OPCODE_DO: 2982 loop_depth++; 2983 break; 2984 case BRW_OPCODE_WHILE: 2985 loop_depth--; 2986 break; 2987 case BRW_OPCODE_IF: 2988 if_depth++; 2989 break; 2990 case BRW_OPCODE_ENDIF: 2991 if_depth--; 2992 break; 2993 } 2994 if (loop_depth || if_depth) 2995 continue; 2996 2997 if (inst->opcode != BRW_OPCODE_MOV || 2998 inst->predicated || 2999 inst->saturate || 3000 inst->dst.file != GRF || inst->src[0].file != GRF || 3001 inst->dst.type != inst->src[0].type) 3002 continue; 3003 3004 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 3005 * them: check for no writes to either one until the exit of the 3006 * program. 3007 */ 3008 bool interfered = false; 3009 exec_list_iterator scan_iter = iter; 3010 scan_iter.next(); 3011 for (; scan_iter.has_next(); scan_iter.next()) { 3012 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3013 3014 if (scan_inst->dst.file == GRF) { 3015 if (scan_inst->dst.reg == inst->dst.reg && 3016 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 3017 scan_inst->opcode == FS_OPCODE_TEX)) { 3018 interfered = true; 3019 break; 3020 } 3021 if (scan_inst->dst.reg == inst->src[0].reg && 3022 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 3023 scan_inst->opcode == FS_OPCODE_TEX)) { 3024 interfered = true; 3025 break; 3026 } 3027 } 3028 } 3029 if (interfered) { 3030 continue; 3031 } 3032 3033 /* Rewrite the later usage to point at the source of the move to 3034 * be removed. 3035 */ 3036 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 3037 scan_iter.next()) { 3038 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 3039 3040 for (int i = 0; i < 3; i++) { 3041 if (scan_inst->src[i].file == GRF && 3042 scan_inst->src[i].reg == inst->dst.reg && 3043 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 3044 scan_inst->src[i].reg = inst->src[0].reg; 3045 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 3046 scan_inst->src[i].abs |= inst->src[0].abs; 3047 scan_inst->src[i].negate ^= inst->src[0].negate; 3048 scan_inst->src[i].smear = inst->src[0].smear; 3049 } 3050 } 3051 } 3052 3053 inst->remove(); 3054 progress = true; 3055 } 3056 3057 if (progress) 3058 live_intervals_valid = false; 3059 3060 return progress; 3061} 3062 3063 3064bool 3065fs_visitor::compute_to_mrf() 3066{ 3067 bool progress = false; 3068 int next_ip = 0; 3069 3070 calculate_live_intervals(); 3071 3072 foreach_iter(exec_list_iterator, iter, this->instructions) { 3073 fs_inst *inst = (fs_inst *)iter.get(); 3074 3075 int ip = next_ip; 3076 next_ip++; 3077 3078 if (inst->opcode != BRW_OPCODE_MOV || 3079 inst->predicated || 3080 inst->dst.file != MRF || inst->src[0].file != GRF || 3081 inst->dst.type != inst->src[0].type || 3082 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 3083 continue; 3084 3085 /* Can't compute-to-MRF this GRF if someone else was going to 3086 * read it later. 3087 */ 3088 if (this->virtual_grf_use[inst->src[0].reg] > ip) 3089 continue; 3090 3091 /* Found a move of a GRF to a MRF. Let's see if we can go 3092 * rewrite the thing that made this GRF to write into the MRF. 3093 */ 3094 fs_inst *scan_inst; 3095 for (scan_inst = (fs_inst *)inst->prev; 3096 scan_inst->prev != NULL; 3097 scan_inst = (fs_inst *)scan_inst->prev) { 3098 if (scan_inst->dst.file == GRF && 3099 scan_inst->dst.reg == inst->src[0].reg) { 3100 /* Found the last thing to write our reg we want to turn 3101 * into a compute-to-MRF. 3102 */ 3103 3104 if (scan_inst->opcode == FS_OPCODE_TEX) { 3105 /* texturing writes several continuous regs, so we can't 3106 * compute-to-mrf that. 3107 */ 3108 break; 3109 } 3110 3111 /* If it's predicated, it (probably) didn't populate all 3112 * the channels. 3113 */ 3114 if (scan_inst->predicated) 3115 break; 3116 3117 /* SEND instructions can't have MRF as a destination. */ 3118 if (scan_inst->mlen) 3119 break; 3120 3121 if (intel->gen >= 6) { 3122 /* gen6 math instructions must have the destination be 3123 * GRF, so no compute-to-MRF for them. 3124 */ 3125 if (scan_inst->opcode == FS_OPCODE_RCP || 3126 scan_inst->opcode == FS_OPCODE_RSQ || 3127 scan_inst->opcode == FS_OPCODE_SQRT || 3128 scan_inst->opcode == FS_OPCODE_EXP2 || 3129 scan_inst->opcode == FS_OPCODE_LOG2 || 3130 scan_inst->opcode == FS_OPCODE_SIN || 3131 scan_inst->opcode == FS_OPCODE_COS || 3132 scan_inst->opcode == FS_OPCODE_POW) { 3133 break; 3134 } 3135 } 3136 3137 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 3138 /* Found the creator of our MRF's source value. */ 3139 scan_inst->dst.file = MRF; 3140 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3141 scan_inst->saturate |= inst->saturate; 3142 inst->remove(); 3143 progress = true; 3144 } 3145 break; 3146 } 3147 3148 /* We don't handle flow control here. Most computation of 3149 * values that end up in MRFs are shortly before the MRF 3150 * write anyway. 3151 */ 3152 if (scan_inst->opcode == BRW_OPCODE_DO || 3153 scan_inst->opcode == BRW_OPCODE_WHILE || 3154 scan_inst->opcode == BRW_OPCODE_ENDIF) { 3155 break; 3156 } 3157 3158 /* You can't read from an MRF, so if someone else reads our 3159 * MRF's source GRF that we wanted to rewrite, that stops us. 3160 */ 3161 bool interfered = false; 3162 for (int i = 0; i < 3; i++) { 3163 if (scan_inst->src[i].file == GRF && 3164 scan_inst->src[i].reg == inst->src[0].reg && 3165 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 3166 interfered = true; 3167 } 3168 } 3169 if (interfered) 3170 break; 3171 3172 if (scan_inst->dst.file == MRF && 3173 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 3174 /* Somebody else wrote our MRF here, so we can't can't 3175 * compute-to-MRF before that. 3176 */ 3177 break; 3178 } 3179 3180 if (scan_inst->mlen > 0) { 3181 /* Found a SEND instruction, which means that there are 3182 * live values in MRFs from base_mrf to base_mrf + 3183 * scan_inst->mlen - 1. Don't go pushing our MRF write up 3184 * above it. 3185 */ 3186 if (inst->dst.hw_reg >= scan_inst->base_mrf && 3187 inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { 3188 break; 3189 } 3190 } 3191 } 3192 } 3193 3194 return progress; 3195} 3196 3197/** 3198 * Walks through basic blocks, locking for repeated MRF writes and 3199 * removing the later ones. 3200 */ 3201bool 3202fs_visitor::remove_duplicate_mrf_writes() 3203{ 3204 fs_inst *last_mrf_move[16]; 3205 bool progress = false; 3206 3207 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3208 3209 foreach_iter(exec_list_iterator, iter, this->instructions) { 3210 fs_inst *inst = (fs_inst *)iter.get(); 3211 3212 switch (inst->opcode) { 3213 case BRW_OPCODE_DO: 3214 case BRW_OPCODE_WHILE: 3215 case BRW_OPCODE_IF: 3216 case BRW_OPCODE_ELSE: 3217 case BRW_OPCODE_ENDIF: 3218 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3219 continue; 3220 default: 3221 break; 3222 } 3223 3224 if (inst->opcode == BRW_OPCODE_MOV && 3225 inst->dst.file == MRF) { 3226 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; 3227 if (prev_inst && inst->equals(prev_inst)) { 3228 inst->remove(); 3229 progress = true; 3230 continue; 3231 } 3232 } 3233 3234 /* Clear out the last-write records for MRFs that were overwritten. */ 3235 if (inst->dst.file == MRF) { 3236 last_mrf_move[inst->dst.hw_reg] = NULL; 3237 } 3238 3239 if (inst->mlen > 0) { 3240 /* Found a SEND instruction, which will include two of fewer 3241 * implied MRF writes. We could do better here. 3242 */ 3243 for (int i = 0; i < implied_mrf_writes(inst); i++) { 3244 last_mrf_move[inst->base_mrf + i] = NULL; 3245 } 3246 } 3247 3248 /* Clear out any MRF move records whose sources got overwritten. */ 3249 if (inst->dst.file == GRF) { 3250 for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { 3251 if (last_mrf_move[i] && 3252 last_mrf_move[i]->src[0].reg == inst->dst.reg) { 3253 last_mrf_move[i] = NULL; 3254 } 3255 } 3256 } 3257 3258 if (inst->opcode == BRW_OPCODE_MOV && 3259 inst->dst.file == MRF && 3260 inst->src[0].file == GRF && 3261 !inst->predicated) { 3262 last_mrf_move[inst->dst.hw_reg] = inst; 3263 } 3264 } 3265 3266 return progress; 3267} 3268 3269bool 3270fs_visitor::virtual_grf_interferes(int a, int b) 3271{ 3272 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3273 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3274 3275 /* For dead code, just check if the def interferes with the other range. */ 3276 if (this->virtual_grf_use[a] == -1) { 3277 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 3278 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 3279 } 3280 if (this->virtual_grf_use[b] == -1) { 3281 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 3282 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 3283 } 3284 3285 return start < end; 3286} 3287 3288static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3289{ 3290 struct brw_reg brw_reg; 3291 3292 switch (reg->file) { 3293 case GRF: 3294 case ARF: 3295 case MRF: 3296 if (reg->smear == -1) { 3297 brw_reg = brw_vec8_reg(reg->file, 3298 reg->hw_reg, 0); 3299 } else { 3300 brw_reg = brw_vec1_reg(reg->file, 3301 reg->hw_reg, reg->smear); 3302 } 3303 brw_reg = retype(brw_reg, reg->type); 3304 break; 3305 case IMM: 3306 switch (reg->type) { 3307 case BRW_REGISTER_TYPE_F: 3308 brw_reg = brw_imm_f(reg->imm.f); 3309 break; 3310 case BRW_REGISTER_TYPE_D: 3311 brw_reg = brw_imm_d(reg->imm.i); 3312 break; 3313 case BRW_REGISTER_TYPE_UD: 3314 brw_reg = brw_imm_ud(reg->imm.u); 3315 break; 3316 default: 3317 assert(!"not reached"); 3318 brw_reg = brw_null_reg(); 3319 break; 3320 } 3321 break; 3322 case FIXED_HW_REG: 3323 brw_reg = reg->fixed_hw_reg; 3324 break; 3325 case BAD_FILE: 3326 /* Probably unused. */ 3327 brw_reg = brw_null_reg(); 3328 break; 3329 case UNIFORM: 3330 assert(!"not reached"); 3331 brw_reg = brw_null_reg(); 3332 break; 3333 default: 3334 assert(!"not reached"); 3335 brw_reg = brw_null_reg(); 3336 break; 3337 } 3338 if (reg->abs) 3339 brw_reg = brw_abs(brw_reg); 3340 if (reg->negate) 3341 brw_reg = negate(brw_reg); 3342 3343 return brw_reg; 3344} 3345 3346void 3347fs_visitor::generate_code() 3348{ 3349 int last_native_inst = 0; 3350 struct brw_instruction *if_stack[16], *loop_stack[16]; 3351 int if_stack_depth = 0, loop_stack_depth = 0; 3352 int if_depth_in_loop[16]; 3353 const char *last_annotation_string = NULL; 3354 ir_instruction *last_annotation_ir = NULL; 3355 3356 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3357 printf("Native code for fragment shader %d:\n", 3358 ctx->Shader.CurrentFragmentProgram->Name); 3359 } 3360 3361 if_depth_in_loop[loop_stack_depth] = 0; 3362 3363 memset(&if_stack, 0, sizeof(if_stack)); 3364 foreach_iter(exec_list_iterator, iter, this->instructions) { 3365 fs_inst *inst = (fs_inst *)iter.get(); 3366 struct brw_reg src[3], dst; 3367 3368 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3369 if (last_annotation_ir != inst->ir) { 3370 last_annotation_ir = inst->ir; 3371 if (last_annotation_ir) { 3372 printf(" "); 3373 last_annotation_ir->print(); 3374 printf("\n"); 3375 } 3376 } 3377 if (last_annotation_string != inst->annotation) { 3378 last_annotation_string = inst->annotation; 3379 if (last_annotation_string) 3380 printf(" %s\n", last_annotation_string); 3381 } 3382 } 3383 3384 for (unsigned int i = 0; i < 3; i++) { 3385 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3386 } 3387 dst = brw_reg_from_fs_reg(&inst->dst); 3388 3389 brw_set_conditionalmod(p, inst->conditional_mod); 3390 brw_set_predicate_control(p, inst->predicated); 3391 brw_set_saturate(p, inst->saturate); 3392 3393 switch (inst->opcode) { 3394 case BRW_OPCODE_MOV: 3395 brw_MOV(p, dst, src[0]); 3396 break; 3397 case BRW_OPCODE_ADD: 3398 brw_ADD(p, dst, src[0], src[1]); 3399 break; 3400 case BRW_OPCODE_MUL: 3401 brw_MUL(p, dst, src[0], src[1]); 3402 break; 3403 3404 case BRW_OPCODE_FRC: 3405 brw_FRC(p, dst, src[0]); 3406 break; 3407 case BRW_OPCODE_RNDD: 3408 brw_RNDD(p, dst, src[0]); 3409 break; 3410 case BRW_OPCODE_RNDE: 3411 brw_RNDE(p, dst, src[0]); 3412 break; 3413 case BRW_OPCODE_RNDZ: 3414 brw_RNDZ(p, dst, src[0]); 3415 break; 3416 3417 case BRW_OPCODE_AND: 3418 brw_AND(p, dst, src[0], src[1]); 3419 break; 3420 case BRW_OPCODE_OR: 3421 brw_OR(p, dst, src[0], src[1]); 3422 break; 3423 case BRW_OPCODE_XOR: 3424 brw_XOR(p, dst, src[0], src[1]); 3425 break; 3426 case BRW_OPCODE_NOT: 3427 brw_NOT(p, dst, src[0]); 3428 break; 3429 case BRW_OPCODE_ASR: 3430 brw_ASR(p, dst, src[0], src[1]); 3431 break; 3432 case BRW_OPCODE_SHR: 3433 brw_SHR(p, dst, src[0], src[1]); 3434 break; 3435 case BRW_OPCODE_SHL: 3436 brw_SHL(p, dst, src[0], src[1]); 3437 break; 3438 3439 case BRW_OPCODE_CMP: 3440 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3441 break; 3442 case BRW_OPCODE_SEL: 3443 brw_SEL(p, dst, src[0], src[1]); 3444 break; 3445 3446 case BRW_OPCODE_IF: 3447 assert(if_stack_depth < 16); 3448 if (inst->src[0].file != BAD_FILE) { 3449 assert(intel->gen >= 6); 3450 if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); 3451 } else { 3452 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3453 } 3454 if_depth_in_loop[loop_stack_depth]++; 3455 if_stack_depth++; 3456 break; 3457 3458 case BRW_OPCODE_ELSE: 3459 if_stack[if_stack_depth - 1] = 3460 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3461 break; 3462 case BRW_OPCODE_ENDIF: 3463 if_stack_depth--; 3464 brw_ENDIF(p , if_stack[if_stack_depth]); 3465 if_depth_in_loop[loop_stack_depth]--; 3466 break; 3467 3468 case BRW_OPCODE_DO: 3469 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3470 if_depth_in_loop[loop_stack_depth] = 0; 3471 break; 3472 3473 case BRW_OPCODE_BREAK: 3474 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3475 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3476 break; 3477 case BRW_OPCODE_CONTINUE: 3478 /* FINISHME: We need to write the loop instruction support still. */ 3479 if (intel->gen >= 6) 3480 brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); 3481 else 3482 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3483 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3484 break; 3485 3486 case BRW_OPCODE_WHILE: { 3487 struct brw_instruction *inst0, *inst1; 3488 GLuint br = 1; 3489 3490 if (intel->gen >= 5) 3491 br = 2; 3492 3493 assert(loop_stack_depth > 0); 3494 loop_stack_depth--; 3495 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3496 if (intel->gen < 6) { 3497 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3498 while (inst0 > loop_stack[loop_stack_depth]) { 3499 inst0--; 3500 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3501 inst0->bits3.if_else.jump_count == 0) { 3502 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3503 } 3504 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3505 inst0->bits3.if_else.jump_count == 0) { 3506 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3507 } 3508 } 3509 } 3510 } 3511 break; 3512 3513 case FS_OPCODE_RCP: 3514 case FS_OPCODE_RSQ: 3515 case FS_OPCODE_SQRT: 3516 case FS_OPCODE_EXP2: 3517 case FS_OPCODE_LOG2: 3518 case FS_OPCODE_POW: 3519 case FS_OPCODE_SIN: 3520 case FS_OPCODE_COS: 3521 generate_math(inst, dst, src); 3522 break; 3523 case FS_OPCODE_CINTERP: 3524 brw_MOV(p, dst, src[0]); 3525 break; 3526 case FS_OPCODE_LINTERP: 3527 generate_linterp(inst, dst, src); 3528 break; 3529 case FS_OPCODE_TEX: 3530 case FS_OPCODE_TXB: 3531 case FS_OPCODE_TXL: 3532 generate_tex(inst, dst); 3533 break; 3534 case FS_OPCODE_DISCARD_NOT: 3535 generate_discard_not(inst, dst); 3536 break; 3537 case FS_OPCODE_DISCARD_AND: 3538 generate_discard_and(inst, src[0]); 3539 break; 3540 case FS_OPCODE_DDX: 3541 generate_ddx(inst, dst, src[0]); 3542 break; 3543 case FS_OPCODE_DDY: 3544 generate_ddy(inst, dst, src[0]); 3545 break; 3546 3547 case FS_OPCODE_SPILL: 3548 generate_spill(inst, src[0]); 3549 break; 3550 3551 case FS_OPCODE_UNSPILL: 3552 generate_unspill(inst, dst); 3553 break; 3554 3555 case FS_OPCODE_PULL_CONSTANT_LOAD: 3556 generate_pull_constant_load(inst, dst); 3557 break; 3558 3559 case FS_OPCODE_FB_WRITE: 3560 generate_fb_write(inst); 3561 break; 3562 default: 3563 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3564 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3565 brw_opcodes[inst->opcode].name); 3566 } else { 3567 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3568 } 3569 this->fail = true; 3570 } 3571 3572 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3573 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3574 if (0) { 3575 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3576 ((uint32_t *)&p->store[i])[3], 3577 ((uint32_t *)&p->store[i])[2], 3578 ((uint32_t *)&p->store[i])[1], 3579 ((uint32_t *)&p->store[i])[0]); 3580 } 3581 brw_disasm(stdout, &p->store[i], intel->gen); 3582 } 3583 } 3584 3585 last_native_inst = p->nr_insn; 3586 } 3587 3588 brw_set_uip_jip(p); 3589 3590 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS 3591 * emit issues, it doesn't get the jump distances into the output, 3592 * which is often something we want to debug. So this is here in 3593 * case you're doing that. 3594 */ 3595 if (0) { 3596 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3597 for (unsigned int i = 0; i < p->nr_insn; i++) { 3598 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3599 ((uint32_t *)&p->store[i])[3], 3600 ((uint32_t *)&p->store[i])[2], 3601 ((uint32_t *)&p->store[i])[1], 3602 ((uint32_t *)&p->store[i])[0]); 3603 brw_disasm(stdout, &p->store[i], intel->gen); 3604 } 3605 } 3606 } 3607} 3608 3609GLboolean 3610brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3611{ 3612 struct intel_context *intel = &brw->intel; 3613 struct gl_context *ctx = &intel->ctx; 3614 struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; 3615 3616 if (!prog) 3617 return GL_FALSE; 3618 3619 struct brw_shader *shader = 3620 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3621 if (!shader) 3622 return GL_FALSE; 3623 3624 /* We always use 8-wide mode, at least for now. For one, flow 3625 * control only works in 8-wide. Also, when we're fragment shader 3626 * bound, we're almost always under register pressure as well, so 3627 * 8-wide would save us from the performance cliff of spilling 3628 * regs. 3629 */ 3630 c->dispatch_width = 8; 3631 3632 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3633 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3634 _mesa_print_ir(shader->ir, NULL); 3635 printf("\n"); 3636 } 3637 3638 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3639 */ 3640 fs_visitor v(c, shader); 3641 3642 if (0) { 3643 v.emit_dummy_fs(); 3644 } else { 3645 v.calculate_urb_setup(); 3646 if (intel->gen < 6) 3647 v.emit_interpolation_setup_gen4(); 3648 else 3649 v.emit_interpolation_setup_gen6(); 3650 3651 /* Generate FS IR for main(). (the visitor only descends into 3652 * functions called "main"). 3653 */ 3654 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3655 ir_instruction *ir = (ir_instruction *)iter.get(); 3656 v.base_ir = ir; 3657 ir->accept(&v); 3658 } 3659 3660 v.emit_fb_writes(); 3661 3662 v.split_virtual_grfs(); 3663 v.setup_pull_constants(); 3664 3665 v.assign_curb_setup(); 3666 v.assign_urb_setup(); 3667 3668 bool progress; 3669 do { 3670 progress = false; 3671 3672 progress = v.remove_duplicate_mrf_writes() || progress; 3673 3674 progress = v.propagate_constants() || progress; 3675 progress = v.register_coalesce() || progress; 3676 progress = v.compute_to_mrf() || progress; 3677 progress = v.dead_code_eliminate() || progress; 3678 } while (progress); 3679 3680 if (0) { 3681 /* Debug of register spilling: Go spill everything. */ 3682 int virtual_grf_count = v.virtual_grf_next; 3683 for (int i = 1; i < virtual_grf_count; i++) { 3684 v.spill_reg(i); 3685 } 3686 } 3687 3688 if (0) 3689 v.assign_regs_trivial(); 3690 else { 3691 while (!v.assign_regs()) { 3692 if (v.fail) 3693 break; 3694 } 3695 } 3696 } 3697 3698 if (!v.fail) 3699 v.generate_code(); 3700 3701 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3702 3703 if (v.fail) 3704 return GL_FALSE; 3705 3706 c->prog_data.total_grf = v.grf_used; 3707 3708 return GL_TRUE; 3709} 3710