brw_fs.cpp revision df4d83dca4618eb7077637865763d3e9ab750d11
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = talloc_zero(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = talloc_zero(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct brw_context *brw = brw_context(ctx); 93 struct intel_context *intel = &brw->intel; 94 95 struct brw_shader *shader = 96 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 97 if (shader != NULL) { 98 void *mem_ctx = talloc_new(NULL); 99 bool progress; 100 101 if (shader->ir) 102 talloc_free(shader->ir); 103 shader->ir = new(shader) exec_list; 104 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 105 106 do_mat_op_to_vec(shader->ir); 107 lower_instructions(shader->ir, 108 MOD_TO_FRACT | 109 DIV_TO_MUL_RCP | 110 SUB_TO_ADD_NEG | 111 EXP_TO_EXP2 | 112 LOG_TO_LOG2); 113 114 /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, 115 * if-statements need to be flattened. 116 */ 117 if (intel->gen < 6) 118 lower_if_to_cond_assign(shader->ir, 16); 119 120 do_lower_texture_projection(shader->ir); 121 do_vec_index_to_cond_assign(shader->ir); 122 brw_do_cubemap_normalize(shader->ir); 123 124 do { 125 progress = false; 126 127 brw_do_channel_expressions(shader->ir); 128 brw_do_vector_splitting(shader->ir); 129 130 progress = do_lower_jumps(shader->ir, true, true, 131 true, /* main return */ 132 false, /* continue */ 133 false /* loops */ 134 ) || progress; 135 136 progress = do_common_optimization(shader->ir, true, 32) || progress; 137 138 progress = lower_noise(shader->ir) || progress; 139 progress = 140 lower_variable_index_to_cond_assign(shader->ir, 141 GL_TRUE, /* input */ 142 GL_TRUE, /* output */ 143 GL_TRUE, /* temp */ 144 GL_TRUE /* uniform */ 145 ) || progress; 146 progress = lower_quadop_vector(shader->ir, false) || progress; 147 } while (progress); 148 149 validate_ir_tree(shader->ir); 150 151 reparent_ir(shader->ir, shader->ir); 152 talloc_free(mem_ctx); 153 } 154 155 if (!_mesa_ir_link_shader(ctx, prog)) 156 return GL_FALSE; 157 158 return GL_TRUE; 159} 160 161static int 162type_size(const struct glsl_type *type) 163{ 164 unsigned int size, i; 165 166 switch (type->base_type) { 167 case GLSL_TYPE_UINT: 168 case GLSL_TYPE_INT: 169 case GLSL_TYPE_FLOAT: 170 case GLSL_TYPE_BOOL: 171 return type->components(); 172 case GLSL_TYPE_ARRAY: 173 return type_size(type->fields.array) * type->length; 174 case GLSL_TYPE_STRUCT: 175 size = 0; 176 for (i = 0; i < type->length; i++) { 177 size += type_size(type->fields.structure[i].type); 178 } 179 return size; 180 case GLSL_TYPE_SAMPLER: 181 /* Samplers take up no register space, since they're baked in at 182 * link time. 183 */ 184 return 0; 185 default: 186 assert(!"not reached"); 187 return 0; 188 } 189} 190 191/** 192 * Returns how many MRFs an FS opcode will write over. 193 * 194 * Note that this is not the 0 or 1 implied writes in an actual gen 195 * instruction -- the FS opcodes often generate MOVs in addition. 196 */ 197int 198fs_visitor::implied_mrf_writes(fs_inst *inst) 199{ 200 if (inst->mlen == 0) 201 return 0; 202 203 switch (inst->opcode) { 204 case FS_OPCODE_RCP: 205 case FS_OPCODE_RSQ: 206 case FS_OPCODE_SQRT: 207 case FS_OPCODE_EXP2: 208 case FS_OPCODE_LOG2: 209 case FS_OPCODE_SIN: 210 case FS_OPCODE_COS: 211 return 1; 212 case FS_OPCODE_POW: 213 return 2; 214 case FS_OPCODE_TEX: 215 case FS_OPCODE_TXB: 216 case FS_OPCODE_TXL: 217 return 1; 218 case FS_OPCODE_FB_WRITE: 219 return 2; 220 case FS_OPCODE_PULL_CONSTANT_LOAD: 221 case FS_OPCODE_UNSPILL: 222 return 1; 223 case FS_OPCODE_SPILL: 224 return 2; 225 default: 226 assert(!"not reached"); 227 return inst->mlen; 228 } 229} 230 231int 232fs_visitor::virtual_grf_alloc(int size) 233{ 234 if (virtual_grf_array_size <= virtual_grf_next) { 235 if (virtual_grf_array_size == 0) 236 virtual_grf_array_size = 16; 237 else 238 virtual_grf_array_size *= 2; 239 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 240 int, virtual_grf_array_size); 241 242 /* This slot is always unused. */ 243 virtual_grf_sizes[0] = 0; 244 } 245 virtual_grf_sizes[virtual_grf_next] = size; 246 return virtual_grf_next++; 247} 248 249/** Fixed HW reg constructor. */ 250fs_reg::fs_reg(enum register_file file, int hw_reg) 251{ 252 init(); 253 this->file = file; 254 this->hw_reg = hw_reg; 255 this->type = BRW_REGISTER_TYPE_F; 256} 257 258/** Fixed HW reg constructor. */ 259fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 260{ 261 init(); 262 this->file = file; 263 this->hw_reg = hw_reg; 264 this->type = type; 265} 266 267int 268brw_type_for_base_type(const struct glsl_type *type) 269{ 270 switch (type->base_type) { 271 case GLSL_TYPE_FLOAT: 272 return BRW_REGISTER_TYPE_F; 273 case GLSL_TYPE_INT: 274 case GLSL_TYPE_BOOL: 275 return BRW_REGISTER_TYPE_D; 276 case GLSL_TYPE_UINT: 277 return BRW_REGISTER_TYPE_UD; 278 case GLSL_TYPE_ARRAY: 279 case GLSL_TYPE_STRUCT: 280 case GLSL_TYPE_SAMPLER: 281 /* These should be overridden with the type of the member when 282 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 283 * way to trip up if we don't. 284 */ 285 return BRW_REGISTER_TYPE_UD; 286 default: 287 assert(!"not reached"); 288 return BRW_REGISTER_TYPE_F; 289 } 290} 291 292/** Automatic reg constructor. */ 293fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 294{ 295 init(); 296 297 this->file = GRF; 298 this->reg = v->virtual_grf_alloc(type_size(type)); 299 this->reg_offset = 0; 300 this->type = brw_type_for_base_type(type); 301} 302 303fs_reg * 304fs_visitor::variable_storage(ir_variable *var) 305{ 306 return (fs_reg *)hash_table_find(this->variable_ht, var); 307} 308 309/* Our support for uniforms is piggy-backed on the struct 310 * gl_fragment_program, because that's where the values actually 311 * get stored, rather than in some global gl_shader_program uniform 312 * store. 313 */ 314int 315fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 316{ 317 unsigned int offset = 0; 318 float *vec_values; 319 320 if (type->is_matrix()) { 321 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 322 type->vector_elements, 323 1); 324 325 for (unsigned int i = 0; i < type->matrix_columns; i++) { 326 offset += setup_uniform_values(loc + offset, column); 327 } 328 329 return offset; 330 } 331 332 switch (type->base_type) { 333 case GLSL_TYPE_FLOAT: 334 case GLSL_TYPE_UINT: 335 case GLSL_TYPE_INT: 336 case GLSL_TYPE_BOOL: 337 vec_values = fp->Base.Parameters->ParameterValues[loc]; 338 for (unsigned int i = 0; i < type->vector_elements; i++) { 339 unsigned int param = c->prog_data.nr_params++; 340 341 assert(param < ARRAY_SIZE(c->prog_data.param)); 342 343 switch (type->base_type) { 344 case GLSL_TYPE_FLOAT: 345 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 346 break; 347 case GLSL_TYPE_UINT: 348 c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; 349 break; 350 case GLSL_TYPE_INT: 351 c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; 352 break; 353 case GLSL_TYPE_BOOL: 354 c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; 355 break; 356 default: 357 assert(!"not reached"); 358 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 359 break; 360 } 361 362 c->prog_data.param[param] = &vec_values[i]; 363 } 364 return 1; 365 366 case GLSL_TYPE_STRUCT: 367 for (unsigned int i = 0; i < type->length; i++) { 368 offset += setup_uniform_values(loc + offset, 369 type->fields.structure[i].type); 370 } 371 return offset; 372 373 case GLSL_TYPE_ARRAY: 374 for (unsigned int i = 0; i < type->length; i++) { 375 offset += setup_uniform_values(loc + offset, type->fields.array); 376 } 377 return offset; 378 379 case GLSL_TYPE_SAMPLER: 380 /* The sampler takes up a slot, but we don't use any values from it. */ 381 return 1; 382 383 default: 384 assert(!"not reached"); 385 return 0; 386 } 387} 388 389 390/* Our support for builtin uniforms is even scarier than non-builtin. 391 * It sits on top of the PROG_STATE_VAR parameters that are 392 * automatically updated from GL context state. 393 */ 394void 395fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 396{ 397 const struct gl_builtin_uniform_desc *statevar = NULL; 398 399 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 400 statevar = &_mesa_builtin_uniform_desc[i]; 401 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 402 break; 403 } 404 405 if (!statevar->name) { 406 this->fail = true; 407 printf("Failed to find builtin uniform `%s'\n", ir->name); 408 return; 409 } 410 411 int array_count; 412 if (ir->type->is_array()) { 413 array_count = ir->type->length; 414 } else { 415 array_count = 1; 416 } 417 418 for (int a = 0; a < array_count; a++) { 419 for (unsigned int i = 0; i < statevar->num_elements; i++) { 420 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 421 int tokens[STATE_LENGTH]; 422 423 memcpy(tokens, element->tokens, sizeof(element->tokens)); 424 if (ir->type->is_array()) { 425 tokens[1] = a; 426 } 427 428 /* This state reference has already been setup by ir_to_mesa, 429 * but we'll get the same index back here. 430 */ 431 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 432 (gl_state_index *)tokens); 433 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 434 435 /* Add each of the unique swizzles of the element as a 436 * parameter. This'll end up matching the expected layout of 437 * the array/matrix/structure we're trying to fill in. 438 */ 439 int last_swiz = -1; 440 for (unsigned int i = 0; i < 4; i++) { 441 int swiz = GET_SWZ(element->swizzle, i); 442 if (swiz == last_swiz) 443 break; 444 last_swiz = swiz; 445 446 c->prog_data.param_convert[c->prog_data.nr_params] = 447 PARAM_NO_CONVERT; 448 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 449 } 450 } 451 } 452} 453 454fs_reg * 455fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 456{ 457 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 458 fs_reg wpos = *reg; 459 fs_reg neg_y = this->pixel_y; 460 neg_y.negate = true; 461 bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 462 463 /* gl_FragCoord.x */ 464 if (ir->pixel_center_integer) { 465 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 466 } else { 467 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 468 } 469 wpos.reg_offset++; 470 471 /* gl_FragCoord.y */ 472 if (!flip && ir->pixel_center_integer) { 473 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 474 } else { 475 fs_reg pixel_y = this->pixel_y; 476 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 477 478 if (flip) { 479 pixel_y.negate = true; 480 offset += c->key.drawable_height - 1.0; 481 } 482 483 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 484 } 485 wpos.reg_offset++; 486 487 /* gl_FragCoord.z */ 488 if (intel->gen >= 6) { 489 emit(fs_inst(BRW_OPCODE_MOV, wpos, 490 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 491 } else { 492 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 493 interp_reg(FRAG_ATTRIB_WPOS, 2))); 494 } 495 wpos.reg_offset++; 496 497 /* gl_FragCoord.w: Already set up in emit_interpolation */ 498 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 499 500 return reg; 501} 502 503fs_reg * 504fs_visitor::emit_general_interpolation(ir_variable *ir) 505{ 506 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 507 /* Interpolation is always in floating point regs. */ 508 reg->type = BRW_REGISTER_TYPE_F; 509 fs_reg attr = *reg; 510 511 unsigned int array_elements; 512 const glsl_type *type; 513 514 if (ir->type->is_array()) { 515 array_elements = ir->type->length; 516 if (array_elements == 0) { 517 this->fail = true; 518 } 519 type = ir->type->fields.array; 520 } else { 521 array_elements = 1; 522 type = ir->type; 523 } 524 525 int location = ir->location; 526 for (unsigned int i = 0; i < array_elements; i++) { 527 for (unsigned int j = 0; j < type->matrix_columns; j++) { 528 if (urb_setup[location] == -1) { 529 /* If there's no incoming setup data for this slot, don't 530 * emit interpolation for it. 531 */ 532 attr.reg_offset += type->vector_elements; 533 location++; 534 continue; 535 } 536 537 for (unsigned int c = 0; c < type->vector_elements; c++) { 538 struct brw_reg interp = interp_reg(location, c); 539 emit(fs_inst(FS_OPCODE_LINTERP, 540 attr, 541 this->delta_x, 542 this->delta_y, 543 fs_reg(interp))); 544 attr.reg_offset++; 545 } 546 547 if (intel->gen < 6) { 548 attr.reg_offset -= type->vector_elements; 549 for (unsigned int c = 0; c < type->vector_elements; c++) { 550 emit(fs_inst(BRW_OPCODE_MUL, 551 attr, 552 attr, 553 this->pixel_w)); 554 attr.reg_offset++; 555 } 556 } 557 location++; 558 } 559 } 560 561 return reg; 562} 563 564fs_reg * 565fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 566{ 567 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 568 569 /* The frontfacing comes in as a bit in the thread payload. */ 570 if (intel->gen >= 6) { 571 emit(fs_inst(BRW_OPCODE_ASR, 572 *reg, 573 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 574 fs_reg(15))); 575 emit(fs_inst(BRW_OPCODE_NOT, 576 *reg, 577 *reg)); 578 emit(fs_inst(BRW_OPCODE_AND, 579 *reg, 580 *reg, 581 fs_reg(1))); 582 } else { 583 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 584 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 585 * us front face 586 */ 587 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 588 *reg, 589 fs_reg(r1_6ud), 590 fs_reg(1u << 31))); 591 inst->conditional_mod = BRW_CONDITIONAL_L; 592 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 593 } 594 595 return reg; 596} 597 598fs_inst * 599fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 600{ 601 switch (opcode) { 602 case FS_OPCODE_RCP: 603 case FS_OPCODE_RSQ: 604 case FS_OPCODE_SQRT: 605 case FS_OPCODE_EXP2: 606 case FS_OPCODE_LOG2: 607 case FS_OPCODE_SIN: 608 case FS_OPCODE_COS: 609 break; 610 default: 611 assert(!"not reached: bad math opcode"); 612 return NULL; 613 } 614 615 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 616 * might be able to do better by doing execsize = 1 math and then 617 * expanding that result out, but we would need to be careful with 618 * masking. 619 * 620 * The hardware ignores source modifiers (negate and abs) on math 621 * instructions, so we also move to a temp to set those up. 622 */ 623 if (intel->gen >= 6 && (src.file == UNIFORM || 624 src.abs || 625 src.negate)) { 626 fs_reg expanded = fs_reg(this, glsl_type::float_type); 627 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 628 src = expanded; 629 } 630 631 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 632 633 if (intel->gen < 6) { 634 inst->base_mrf = 2; 635 inst->mlen = 1; 636 } 637 638 return inst; 639} 640 641fs_inst * 642fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 643{ 644 int base_mrf = 2; 645 fs_inst *inst; 646 647 assert(opcode == FS_OPCODE_POW); 648 649 if (intel->gen >= 6) { 650 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 651 if (src0.file == UNIFORM) { 652 fs_reg expanded = fs_reg(this, glsl_type::float_type); 653 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 654 src0 = expanded; 655 } 656 657 if (src1.file == UNIFORM) { 658 fs_reg expanded = fs_reg(this, glsl_type::float_type); 659 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 660 src1 = expanded; 661 } 662 663 inst = emit(fs_inst(opcode, dst, src0, src1)); 664 } else { 665 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 666 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 667 668 inst->base_mrf = base_mrf; 669 inst->mlen = 2; 670 } 671 return inst; 672} 673 674void 675fs_visitor::visit(ir_variable *ir) 676{ 677 fs_reg *reg = NULL; 678 679 if (variable_storage(ir)) 680 return; 681 682 if (strcmp(ir->name, "gl_FragColor") == 0) { 683 this->frag_color = ir; 684 } else if (strcmp(ir->name, "gl_FragData") == 0) { 685 this->frag_data = ir; 686 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 687 this->frag_depth = ir; 688 } 689 690 if (ir->mode == ir_var_in) { 691 if (!strcmp(ir->name, "gl_FragCoord")) { 692 reg = emit_fragcoord_interpolation(ir); 693 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 694 reg = emit_frontfacing_interpolation(ir); 695 } else { 696 reg = emit_general_interpolation(ir); 697 } 698 assert(reg); 699 hash_table_insert(this->variable_ht, reg, ir); 700 return; 701 } 702 703 if (ir->mode == ir_var_uniform) { 704 int param_index = c->prog_data.nr_params; 705 706 if (!strncmp(ir->name, "gl_", 3)) { 707 setup_builtin_uniform_values(ir); 708 } else { 709 setup_uniform_values(ir->location, ir->type); 710 } 711 712 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 713 reg->type = brw_type_for_base_type(ir->type); 714 } 715 716 if (!reg) 717 reg = new(this->mem_ctx) fs_reg(this, ir->type); 718 719 hash_table_insert(this->variable_ht, reg, ir); 720} 721 722void 723fs_visitor::visit(ir_dereference_variable *ir) 724{ 725 fs_reg *reg = variable_storage(ir->var); 726 this->result = *reg; 727} 728 729void 730fs_visitor::visit(ir_dereference_record *ir) 731{ 732 const glsl_type *struct_type = ir->record->type; 733 734 ir->record->accept(this); 735 736 unsigned int offset = 0; 737 for (unsigned int i = 0; i < struct_type->length; i++) { 738 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 739 break; 740 offset += type_size(struct_type->fields.structure[i].type); 741 } 742 this->result.reg_offset += offset; 743 this->result.type = brw_type_for_base_type(ir->type); 744} 745 746void 747fs_visitor::visit(ir_dereference_array *ir) 748{ 749 ir_constant *index; 750 int element_size; 751 752 ir->array->accept(this); 753 index = ir->array_index->as_constant(); 754 755 element_size = type_size(ir->type); 756 this->result.type = brw_type_for_base_type(ir->type); 757 758 if (index) { 759 assert(this->result.file == UNIFORM || 760 (this->result.file == GRF && 761 this->result.reg != 0)); 762 this->result.reg_offset += index->value.i[0] * element_size; 763 } else { 764 assert(!"FINISHME: non-constant array element"); 765 } 766} 767 768/* Instruction selection: Produce a MOV.sat instead of 769 * MIN(MAX(val, 0), 1) when possible. 770 */ 771bool 772fs_visitor::try_emit_saturate(ir_expression *ir) 773{ 774 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 775 776 if (!sat_val) 777 return false; 778 779 sat_val->accept(this); 780 fs_reg src = this->result; 781 782 this->result = fs_reg(this, ir->type); 783 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); 784 inst->saturate = true; 785 786 return true; 787} 788 789static uint32_t 790brw_conditional_for_comparison(unsigned int op) 791{ 792 switch (op) { 793 case ir_binop_less: 794 return BRW_CONDITIONAL_L; 795 case ir_binop_greater: 796 return BRW_CONDITIONAL_G; 797 case ir_binop_lequal: 798 return BRW_CONDITIONAL_LE; 799 case ir_binop_gequal: 800 return BRW_CONDITIONAL_GE; 801 case ir_binop_equal: 802 case ir_binop_all_equal: /* same as equal for scalars */ 803 return BRW_CONDITIONAL_Z; 804 case ir_binop_nequal: 805 case ir_binop_any_nequal: /* same as nequal for scalars */ 806 return BRW_CONDITIONAL_NZ; 807 default: 808 assert(!"not reached: bad operation for comparison"); 809 return BRW_CONDITIONAL_NZ; 810 } 811} 812 813void 814fs_visitor::visit(ir_expression *ir) 815{ 816 unsigned int operand; 817 fs_reg op[2], temp; 818 fs_inst *inst; 819 820 assert(ir->get_num_operands() <= 2); 821 822 if (try_emit_saturate(ir)) 823 return; 824 825 for (operand = 0; operand < ir->get_num_operands(); operand++) { 826 ir->operands[operand]->accept(this); 827 if (this->result.file == BAD_FILE) { 828 ir_print_visitor v; 829 printf("Failed to get tree for expression operand:\n"); 830 ir->operands[operand]->accept(&v); 831 this->fail = true; 832 } 833 op[operand] = this->result; 834 835 /* Matrix expression operands should have been broken down to vector 836 * operations already. 837 */ 838 assert(!ir->operands[operand]->type->is_matrix()); 839 /* And then those vector operands should have been broken down to scalar. 840 */ 841 assert(!ir->operands[operand]->type->is_vector()); 842 } 843 844 /* Storage for our result. If our result goes into an assignment, it will 845 * just get copy-propagated out, so no worries. 846 */ 847 this->result = fs_reg(this, ir->type); 848 849 switch (ir->operation) { 850 case ir_unop_logic_not: 851 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 852 * ones complement of the whole register, not just bit 0. 853 */ 854 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 855 break; 856 case ir_unop_neg: 857 op[0].negate = !op[0].negate; 858 this->result = op[0]; 859 break; 860 case ir_unop_abs: 861 op[0].abs = true; 862 this->result = op[0]; 863 break; 864 case ir_unop_sign: 865 temp = fs_reg(this, ir->type); 866 867 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 868 869 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 870 inst->conditional_mod = BRW_CONDITIONAL_G; 871 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 872 inst->predicated = true; 873 874 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 875 inst->conditional_mod = BRW_CONDITIONAL_L; 876 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 877 inst->predicated = true; 878 879 break; 880 case ir_unop_rcp: 881 emit_math(FS_OPCODE_RCP, this->result, op[0]); 882 break; 883 884 case ir_unop_exp2: 885 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 886 break; 887 case ir_unop_log2: 888 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 889 break; 890 case ir_unop_exp: 891 case ir_unop_log: 892 assert(!"not reached: should be handled by ir_explog_to_explog2"); 893 break; 894 case ir_unop_sin: 895 case ir_unop_sin_reduced: 896 emit_math(FS_OPCODE_SIN, this->result, op[0]); 897 break; 898 case ir_unop_cos: 899 case ir_unop_cos_reduced: 900 emit_math(FS_OPCODE_COS, this->result, op[0]); 901 break; 902 903 case ir_unop_dFdx: 904 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 905 break; 906 case ir_unop_dFdy: 907 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 908 break; 909 910 case ir_binop_add: 911 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 912 break; 913 case ir_binop_sub: 914 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 915 break; 916 917 case ir_binop_mul: 918 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 919 break; 920 case ir_binop_div: 921 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 922 break; 923 case ir_binop_mod: 924 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 925 break; 926 927 case ir_binop_less: 928 case ir_binop_greater: 929 case ir_binop_lequal: 930 case ir_binop_gequal: 931 case ir_binop_equal: 932 case ir_binop_all_equal: 933 case ir_binop_nequal: 934 case ir_binop_any_nequal: 935 temp = this->result; 936 /* original gen4 does implicit conversion before comparison. */ 937 if (intel->gen < 5) 938 temp.type = op[0].type; 939 940 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); 941 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 942 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 943 break; 944 945 case ir_binop_logic_xor: 946 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 947 break; 948 949 case ir_binop_logic_or: 950 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 951 break; 952 953 case ir_binop_logic_and: 954 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 955 break; 956 957 case ir_binop_dot: 958 case ir_unop_any: 959 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 960 break; 961 962 case ir_unop_noise: 963 assert(!"not reached: should be handled by lower_noise"); 964 break; 965 966 case ir_quadop_vector: 967 assert(!"not reached: should be handled by lower_quadop_vector"); 968 break; 969 970 case ir_unop_sqrt: 971 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 972 break; 973 974 case ir_unop_rsq: 975 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 976 break; 977 978 case ir_unop_i2f: 979 case ir_unop_b2f: 980 case ir_unop_b2i: 981 case ir_unop_f2i: 982 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 983 break; 984 case ir_unop_f2b: 985 case ir_unop_i2b: 986 temp = this->result; 987 /* original gen4 does implicit conversion before comparison. */ 988 if (intel->gen < 5) 989 temp.type = op[0].type; 990 991 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); 992 inst->conditional_mod = BRW_CONDITIONAL_NZ; 993 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 994 this->result, fs_reg(1))); 995 break; 996 997 case ir_unop_trunc: 998 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 999 break; 1000 case ir_unop_ceil: 1001 op[0].negate = !op[0].negate; 1002 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1003 this->result.negate = true; 1004 break; 1005 case ir_unop_floor: 1006 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1007 break; 1008 case ir_unop_fract: 1009 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1010 break; 1011 case ir_unop_round_even: 1012 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 1013 break; 1014 1015 case ir_binop_min: 1016 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1017 inst->conditional_mod = BRW_CONDITIONAL_L; 1018 1019 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1020 inst->predicated = true; 1021 break; 1022 case ir_binop_max: 1023 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1024 inst->conditional_mod = BRW_CONDITIONAL_G; 1025 1026 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1027 inst->predicated = true; 1028 break; 1029 1030 case ir_binop_pow: 1031 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 1032 break; 1033 1034 case ir_unop_bit_not: 1035 inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); 1036 break; 1037 case ir_binop_bit_and: 1038 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1039 break; 1040 case ir_binop_bit_xor: 1041 inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1042 break; 1043 case ir_binop_bit_or: 1044 inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1045 break; 1046 1047 case ir_unop_u2f: 1048 case ir_binop_lshift: 1049 case ir_binop_rshift: 1050 assert(!"GLSL 1.30 features unsupported"); 1051 break; 1052 } 1053} 1054 1055void 1056fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1057 const glsl_type *type, bool predicated) 1058{ 1059 switch (type->base_type) { 1060 case GLSL_TYPE_FLOAT: 1061 case GLSL_TYPE_UINT: 1062 case GLSL_TYPE_INT: 1063 case GLSL_TYPE_BOOL: 1064 for (unsigned int i = 0; i < type->components(); i++) { 1065 l.type = brw_type_for_base_type(type); 1066 r.type = brw_type_for_base_type(type); 1067 1068 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1069 inst->predicated = predicated; 1070 1071 l.reg_offset++; 1072 r.reg_offset++; 1073 } 1074 break; 1075 case GLSL_TYPE_ARRAY: 1076 for (unsigned int i = 0; i < type->length; i++) { 1077 emit_assignment_writes(l, r, type->fields.array, predicated); 1078 } 1079 break; 1080 1081 case GLSL_TYPE_STRUCT: 1082 for (unsigned int i = 0; i < type->length; i++) { 1083 emit_assignment_writes(l, r, type->fields.structure[i].type, 1084 predicated); 1085 } 1086 break; 1087 1088 case GLSL_TYPE_SAMPLER: 1089 break; 1090 1091 default: 1092 assert(!"not reached"); 1093 break; 1094 } 1095} 1096 1097void 1098fs_visitor::visit(ir_assignment *ir) 1099{ 1100 struct fs_reg l, r; 1101 fs_inst *inst; 1102 1103 /* FINISHME: arrays on the lhs */ 1104 ir->lhs->accept(this); 1105 l = this->result; 1106 1107 ir->rhs->accept(this); 1108 r = this->result; 1109 1110 assert(l.file != BAD_FILE); 1111 assert(r.file != BAD_FILE); 1112 1113 if (ir->condition) { 1114 emit_bool_to_cond_code(ir->condition); 1115 } 1116 1117 if (ir->lhs->type->is_scalar() || 1118 ir->lhs->type->is_vector()) { 1119 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1120 if (ir->write_mask & (1 << i)) { 1121 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1122 if (ir->condition) 1123 inst->predicated = true; 1124 r.reg_offset++; 1125 } 1126 l.reg_offset++; 1127 } 1128 } else { 1129 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1130 } 1131} 1132 1133fs_inst * 1134fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1135{ 1136 int mlen; 1137 int base_mrf = 1; 1138 bool simd16 = false; 1139 fs_reg orig_dst; 1140 1141 /* g0 header. */ 1142 mlen = 1; 1143 1144 if (ir->shadow_comparitor) { 1145 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1146 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1147 coordinate)); 1148 coordinate.reg_offset++; 1149 } 1150 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1151 mlen += 3; 1152 1153 if (ir->op == ir_tex) { 1154 /* There's no plain shadow compare message, so we use shadow 1155 * compare with a bias of 0.0. 1156 */ 1157 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1158 fs_reg(0.0f))); 1159 mlen++; 1160 } else if (ir->op == ir_txb) { 1161 ir->lod_info.bias->accept(this); 1162 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1163 this->result)); 1164 mlen++; 1165 } else { 1166 assert(ir->op == ir_txl); 1167 ir->lod_info.lod->accept(this); 1168 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1169 this->result)); 1170 mlen++; 1171 } 1172 1173 ir->shadow_comparitor->accept(this); 1174 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1175 mlen++; 1176 } else if (ir->op == ir_tex) { 1177 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1178 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1179 coordinate)); 1180 coordinate.reg_offset++; 1181 } 1182 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1183 mlen += 3; 1184 } else { 1185 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1186 * instructions. We'll need to do SIMD16 here. 1187 */ 1188 assert(ir->op == ir_txb || ir->op == ir_txl); 1189 1190 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1191 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1192 coordinate)); 1193 coordinate.reg_offset++; 1194 } 1195 1196 /* lod/bias appears after u/v/r. */ 1197 mlen += 6; 1198 1199 if (ir->op == ir_txb) { 1200 ir->lod_info.bias->accept(this); 1201 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1202 this->result)); 1203 mlen++; 1204 } else { 1205 ir->lod_info.lod->accept(this); 1206 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1207 this->result)); 1208 mlen++; 1209 } 1210 1211 /* The unused upper half. */ 1212 mlen++; 1213 1214 /* Now, since we're doing simd16, the return is 2 interleaved 1215 * vec4s where the odd-indexed ones are junk. We'll need to move 1216 * this weirdness around to the expected layout. 1217 */ 1218 simd16 = true; 1219 orig_dst = dst; 1220 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1221 2)); 1222 dst.type = BRW_REGISTER_TYPE_F; 1223 } 1224 1225 fs_inst *inst = NULL; 1226 switch (ir->op) { 1227 case ir_tex: 1228 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1229 break; 1230 case ir_txb: 1231 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1232 break; 1233 case ir_txl: 1234 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1235 break; 1236 case ir_txd: 1237 case ir_txf: 1238 assert(!"GLSL 1.30 features unsupported"); 1239 break; 1240 } 1241 inst->base_mrf = base_mrf; 1242 inst->mlen = mlen; 1243 1244 if (simd16) { 1245 for (int i = 0; i < 4; i++) { 1246 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1247 orig_dst.reg_offset++; 1248 dst.reg_offset += 2; 1249 } 1250 } 1251 1252 return inst; 1253} 1254 1255fs_inst * 1256fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1257{ 1258 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1259 * optional parameters like shadow comparitor or LOD bias. If 1260 * optional parameters aren't present, those base slots are 1261 * optional and don't need to be included in the message. 1262 * 1263 * We don't fill in the unnecessary slots regardless, which may 1264 * look surprising in the disassembly. 1265 */ 1266 int mlen = 1; /* g0 header always present. */ 1267 int base_mrf = 1; 1268 1269 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1270 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1271 coordinate)); 1272 coordinate.reg_offset++; 1273 } 1274 mlen += ir->coordinate->type->vector_elements; 1275 1276 if (ir->shadow_comparitor) { 1277 mlen = MAX2(mlen, 5); 1278 1279 ir->shadow_comparitor->accept(this); 1280 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1281 mlen++; 1282 } 1283 1284 fs_inst *inst = NULL; 1285 switch (ir->op) { 1286 case ir_tex: 1287 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1288 break; 1289 case ir_txb: 1290 ir->lod_info.bias->accept(this); 1291 mlen = MAX2(mlen, 5); 1292 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1293 mlen++; 1294 1295 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1296 break; 1297 case ir_txl: 1298 ir->lod_info.lod->accept(this); 1299 mlen = MAX2(mlen, 5); 1300 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1301 mlen++; 1302 1303 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1304 break; 1305 case ir_txd: 1306 case ir_txf: 1307 assert(!"GLSL 1.30 features unsupported"); 1308 break; 1309 } 1310 inst->base_mrf = base_mrf; 1311 inst->mlen = mlen; 1312 1313 return inst; 1314} 1315 1316void 1317fs_visitor::visit(ir_texture *ir) 1318{ 1319 int sampler; 1320 fs_inst *inst = NULL; 1321 1322 ir->coordinate->accept(this); 1323 fs_reg coordinate = this->result; 1324 1325 /* Should be lowered by do_lower_texture_projection */ 1326 assert(!ir->projector); 1327 1328 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1329 ctx->Shader.CurrentFragmentProgram, 1330 &brw->fragment_program->Base); 1331 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1332 1333 /* The 965 requires the EU to do the normalization of GL rectangle 1334 * texture coordinates. We use the program parameter state 1335 * tracking to get the scaling factor. 1336 */ 1337 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1338 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1339 int tokens[STATE_LENGTH] = { 1340 STATE_INTERNAL, 1341 STATE_TEXRECT_SCALE, 1342 sampler, 1343 0, 1344 0 1345 }; 1346 1347 c->prog_data.param_convert[c->prog_data.nr_params] = 1348 PARAM_NO_CONVERT; 1349 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1350 PARAM_NO_CONVERT; 1351 1352 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1353 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1354 GLuint index = _mesa_add_state_reference(params, 1355 (gl_state_index *)tokens); 1356 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 1357 1358 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; 1359 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; 1360 1361 fs_reg dst = fs_reg(this, ir->coordinate->type); 1362 fs_reg src = coordinate; 1363 coordinate = dst; 1364 1365 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1366 dst.reg_offset++; 1367 src.reg_offset++; 1368 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1369 } 1370 1371 /* Writemasking doesn't eliminate channels on SIMD8 texture 1372 * samples, so don't worry about them. 1373 */ 1374 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1375 1376 if (intel->gen < 5) { 1377 inst = emit_texture_gen4(ir, dst, coordinate); 1378 } else { 1379 inst = emit_texture_gen5(ir, dst, coordinate); 1380 } 1381 1382 inst->sampler = sampler; 1383 1384 this->result = dst; 1385 1386 if (ir->shadow_comparitor) 1387 inst->shadow_compare = true; 1388 1389 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1390 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1391 1392 for (int i = 0; i < 4; i++) { 1393 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1394 fs_reg l = swizzle_dst; 1395 l.reg_offset += i; 1396 1397 if (swiz == SWIZZLE_ZERO) { 1398 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1399 } else if (swiz == SWIZZLE_ONE) { 1400 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1401 } else { 1402 fs_reg r = dst; 1403 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1404 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1405 } 1406 } 1407 this->result = swizzle_dst; 1408 } 1409} 1410 1411void 1412fs_visitor::visit(ir_swizzle *ir) 1413{ 1414 ir->val->accept(this); 1415 fs_reg val = this->result; 1416 1417 if (ir->type->vector_elements == 1) { 1418 this->result.reg_offset += ir->mask.x; 1419 return; 1420 } 1421 1422 fs_reg result = fs_reg(this, ir->type); 1423 this->result = result; 1424 1425 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1426 fs_reg channel = val; 1427 int swiz = 0; 1428 1429 switch (i) { 1430 case 0: 1431 swiz = ir->mask.x; 1432 break; 1433 case 1: 1434 swiz = ir->mask.y; 1435 break; 1436 case 2: 1437 swiz = ir->mask.z; 1438 break; 1439 case 3: 1440 swiz = ir->mask.w; 1441 break; 1442 } 1443 1444 channel.reg_offset += swiz; 1445 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1446 result.reg_offset++; 1447 } 1448} 1449 1450void 1451fs_visitor::visit(ir_discard *ir) 1452{ 1453 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1454 1455 assert(ir->condition == NULL); /* FINISHME */ 1456 1457 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1458 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1459 kill_emitted = true; 1460} 1461 1462void 1463fs_visitor::visit(ir_constant *ir) 1464{ 1465 /* Set this->result to reg at the bottom of the function because some code 1466 * paths will cause this visitor to be applied to other fields. This will 1467 * cause the value stored in this->result to be modified. 1468 * 1469 * Make reg constant so that it doesn't get accidentally modified along the 1470 * way. Yes, I actually had this problem. :( 1471 */ 1472 const fs_reg reg(this, ir->type); 1473 fs_reg dst_reg = reg; 1474 1475 if (ir->type->is_array()) { 1476 const unsigned size = type_size(ir->type->fields.array); 1477 1478 for (unsigned i = 0; i < ir->type->length; i++) { 1479 ir->array_elements[i]->accept(this); 1480 fs_reg src_reg = this->result; 1481 1482 dst_reg.type = src_reg.type; 1483 for (unsigned j = 0; j < size; j++) { 1484 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1485 src_reg.reg_offset++; 1486 dst_reg.reg_offset++; 1487 } 1488 } 1489 } else if (ir->type->is_record()) { 1490 foreach_list(node, &ir->components) { 1491 ir_instruction *const field = (ir_instruction *) node; 1492 const unsigned size = type_size(field->type); 1493 1494 field->accept(this); 1495 fs_reg src_reg = this->result; 1496 1497 dst_reg.type = src_reg.type; 1498 for (unsigned j = 0; j < size; j++) { 1499 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); 1500 src_reg.reg_offset++; 1501 dst_reg.reg_offset++; 1502 } 1503 } 1504 } else { 1505 const unsigned size = type_size(ir->type); 1506 1507 for (unsigned i = 0; i < size; i++) { 1508 switch (ir->type->base_type) { 1509 case GLSL_TYPE_FLOAT: 1510 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); 1511 break; 1512 case GLSL_TYPE_UINT: 1513 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); 1514 break; 1515 case GLSL_TYPE_INT: 1516 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); 1517 break; 1518 case GLSL_TYPE_BOOL: 1519 emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); 1520 break; 1521 default: 1522 assert(!"Non-float/uint/int/bool constant"); 1523 } 1524 dst_reg.reg_offset++; 1525 } 1526 } 1527 1528 this->result = reg; 1529} 1530 1531void 1532fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1533{ 1534 ir_expression *expr = ir->as_expression(); 1535 1536 if (expr) { 1537 fs_reg op[2]; 1538 fs_inst *inst; 1539 1540 assert(expr->get_num_operands() <= 2); 1541 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1542 assert(expr->operands[i]->type->is_scalar()); 1543 1544 expr->operands[i]->accept(this); 1545 op[i] = this->result; 1546 } 1547 1548 switch (expr->operation) { 1549 case ir_unop_logic_not: 1550 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1551 inst->conditional_mod = BRW_CONDITIONAL_Z; 1552 break; 1553 1554 case ir_binop_logic_xor: 1555 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1556 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1557 break; 1558 1559 case ir_binop_logic_or: 1560 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1561 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1562 break; 1563 1564 case ir_binop_logic_and: 1565 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1566 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1567 break; 1568 1569 case ir_unop_f2b: 1570 if (intel->gen >= 6) { 1571 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1572 op[0], fs_reg(0.0f))); 1573 } else { 1574 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); 1575 } 1576 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1577 break; 1578 1579 case ir_unop_i2b: 1580 if (intel->gen >= 6) { 1581 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1582 } else { 1583 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1584 } 1585 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1586 break; 1587 1588 case ir_binop_greater: 1589 case ir_binop_gequal: 1590 case ir_binop_less: 1591 case ir_binop_lequal: 1592 case ir_binop_equal: 1593 case ir_binop_all_equal: 1594 case ir_binop_nequal: 1595 case ir_binop_any_nequal: 1596 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); 1597 inst->conditional_mod = 1598 brw_conditional_for_comparison(expr->operation); 1599 break; 1600 1601 default: 1602 assert(!"not reached"); 1603 this->fail = true; 1604 break; 1605 } 1606 return; 1607 } 1608 1609 ir->accept(this); 1610 1611 if (intel->gen >= 6) { 1612 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1613 this->result, fs_reg(1))); 1614 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1615 } else { 1616 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1617 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1618 } 1619} 1620 1621/** 1622 * Emit a gen6 IF statement with the comparison folded into the IF 1623 * instruction. 1624 */ 1625void 1626fs_visitor::emit_if_gen6(ir_if *ir) 1627{ 1628 ir_expression *expr = ir->condition->as_expression(); 1629 1630 if (expr) { 1631 fs_reg op[2]; 1632 fs_inst *inst; 1633 fs_reg temp; 1634 1635 assert(expr->get_num_operands() <= 2); 1636 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1637 assert(expr->operands[i]->type->is_scalar()); 1638 1639 expr->operands[i]->accept(this); 1640 op[i] = this->result; 1641 } 1642 1643 switch (expr->operation) { 1644 case ir_unop_logic_not: 1645 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); 1646 inst->conditional_mod = BRW_CONDITIONAL_Z; 1647 return; 1648 1649 case ir_binop_logic_xor: 1650 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1651 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1652 return; 1653 1654 case ir_binop_logic_or: 1655 temp = fs_reg(this, glsl_type::bool_type); 1656 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1657 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1658 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1659 return; 1660 1661 case ir_binop_logic_and: 1662 temp = fs_reg(this, glsl_type::bool_type); 1663 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1664 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1665 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1666 return; 1667 1668 case ir_unop_f2b: 1669 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1670 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1671 return; 1672 1673 case ir_unop_i2b: 1674 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1675 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1676 return; 1677 1678 case ir_binop_greater: 1679 case ir_binop_gequal: 1680 case ir_binop_less: 1681 case ir_binop_lequal: 1682 case ir_binop_equal: 1683 case ir_binop_all_equal: 1684 case ir_binop_nequal: 1685 case ir_binop_any_nequal: 1686 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1687 inst->conditional_mod = 1688 brw_conditional_for_comparison(expr->operation); 1689 return; 1690 default: 1691 assert(!"not reached"); 1692 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1693 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1694 this->fail = true; 1695 return; 1696 } 1697 return; 1698 } 1699 1700 ir->condition->accept(this); 1701 1702 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1703 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1704} 1705 1706void 1707fs_visitor::visit(ir_if *ir) 1708{ 1709 fs_inst *inst; 1710 1711 /* Don't point the annotation at the if statement, because then it plus 1712 * the then and else blocks get printed. 1713 */ 1714 this->base_ir = ir->condition; 1715 1716 if (intel->gen >= 6) { 1717 emit_if_gen6(ir); 1718 } else { 1719 emit_bool_to_cond_code(ir->condition); 1720 1721 inst = emit(fs_inst(BRW_OPCODE_IF)); 1722 inst->predicated = true; 1723 } 1724 1725 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1726 ir_instruction *ir = (ir_instruction *)iter.get(); 1727 this->base_ir = ir; 1728 1729 ir->accept(this); 1730 } 1731 1732 if (!ir->else_instructions.is_empty()) { 1733 emit(fs_inst(BRW_OPCODE_ELSE)); 1734 1735 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1736 ir_instruction *ir = (ir_instruction *)iter.get(); 1737 this->base_ir = ir; 1738 1739 ir->accept(this); 1740 } 1741 } 1742 1743 emit(fs_inst(BRW_OPCODE_ENDIF)); 1744} 1745 1746void 1747fs_visitor::visit(ir_loop *ir) 1748{ 1749 fs_reg counter = reg_undef; 1750 1751 if (ir->counter) { 1752 this->base_ir = ir->counter; 1753 ir->counter->accept(this); 1754 counter = *(variable_storage(ir->counter)); 1755 1756 if (ir->from) { 1757 this->base_ir = ir->from; 1758 ir->from->accept(this); 1759 1760 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1761 } 1762 } 1763 1764 emit(fs_inst(BRW_OPCODE_DO)); 1765 1766 if (ir->to) { 1767 this->base_ir = ir->to; 1768 ir->to->accept(this); 1769 1770 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, 1771 counter, this->result)); 1772 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1773 1774 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1775 inst->predicated = true; 1776 } 1777 1778 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1779 ir_instruction *ir = (ir_instruction *)iter.get(); 1780 1781 this->base_ir = ir; 1782 ir->accept(this); 1783 } 1784 1785 if (ir->increment) { 1786 this->base_ir = ir->increment; 1787 ir->increment->accept(this); 1788 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1789 } 1790 1791 emit(fs_inst(BRW_OPCODE_WHILE)); 1792} 1793 1794void 1795fs_visitor::visit(ir_loop_jump *ir) 1796{ 1797 switch (ir->mode) { 1798 case ir_loop_jump::jump_break: 1799 emit(fs_inst(BRW_OPCODE_BREAK)); 1800 break; 1801 case ir_loop_jump::jump_continue: 1802 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1803 break; 1804 } 1805} 1806 1807void 1808fs_visitor::visit(ir_call *ir) 1809{ 1810 assert(!"FINISHME"); 1811} 1812 1813void 1814fs_visitor::visit(ir_return *ir) 1815{ 1816 assert(!"FINISHME"); 1817} 1818 1819void 1820fs_visitor::visit(ir_function *ir) 1821{ 1822 /* Ignore function bodies other than main() -- we shouldn't see calls to 1823 * them since they should all be inlined before we get to ir_to_mesa. 1824 */ 1825 if (strcmp(ir->name, "main") == 0) { 1826 const ir_function_signature *sig; 1827 exec_list empty; 1828 1829 sig = ir->matching_signature(&empty); 1830 1831 assert(sig); 1832 1833 foreach_iter(exec_list_iterator, iter, sig->body) { 1834 ir_instruction *ir = (ir_instruction *)iter.get(); 1835 this->base_ir = ir; 1836 1837 ir->accept(this); 1838 } 1839 } 1840} 1841 1842void 1843fs_visitor::visit(ir_function_signature *ir) 1844{ 1845 assert(!"not reached"); 1846 (void)ir; 1847} 1848 1849fs_inst * 1850fs_visitor::emit(fs_inst inst) 1851{ 1852 fs_inst *list_inst = new(mem_ctx) fs_inst; 1853 *list_inst = inst; 1854 1855 list_inst->annotation = this->current_annotation; 1856 list_inst->ir = this->base_ir; 1857 1858 this->instructions.push_tail(list_inst); 1859 1860 return list_inst; 1861} 1862 1863/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1864void 1865fs_visitor::emit_dummy_fs() 1866{ 1867 /* Everyone's favorite color. */ 1868 emit(fs_inst(BRW_OPCODE_MOV, 1869 fs_reg(MRF, 2), 1870 fs_reg(1.0f))); 1871 emit(fs_inst(BRW_OPCODE_MOV, 1872 fs_reg(MRF, 3), 1873 fs_reg(0.0f))); 1874 emit(fs_inst(BRW_OPCODE_MOV, 1875 fs_reg(MRF, 4), 1876 fs_reg(1.0f))); 1877 emit(fs_inst(BRW_OPCODE_MOV, 1878 fs_reg(MRF, 5), 1879 fs_reg(0.0f))); 1880 1881 fs_inst *write; 1882 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1883 fs_reg(0), 1884 fs_reg(0))); 1885 write->base_mrf = 0; 1886} 1887 1888/* The register location here is relative to the start of the URB 1889 * data. It will get adjusted to be a real location before 1890 * generate_code() time. 1891 */ 1892struct brw_reg 1893fs_visitor::interp_reg(int location, int channel) 1894{ 1895 int regnr = urb_setup[location] * 2 + channel / 2; 1896 int stride = (channel & 1) * 4; 1897 1898 assert(urb_setup[location] != -1); 1899 1900 return brw_vec1_grf(regnr, stride); 1901} 1902 1903/** Emits the interpolation for the varying inputs. */ 1904void 1905fs_visitor::emit_interpolation_setup_gen4() 1906{ 1907 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1908 1909 this->current_annotation = "compute pixel centers"; 1910 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1911 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1912 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1913 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1914 emit(fs_inst(BRW_OPCODE_ADD, 1915 this->pixel_x, 1916 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1917 fs_reg(brw_imm_v(0x10101010)))); 1918 emit(fs_inst(BRW_OPCODE_ADD, 1919 this->pixel_y, 1920 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1921 fs_reg(brw_imm_v(0x11001100)))); 1922 1923 this->current_annotation = "compute pixel deltas from v0"; 1924 if (brw->has_pln) { 1925 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1926 this->delta_y = this->delta_x; 1927 this->delta_y.reg_offset++; 1928 } else { 1929 this->delta_x = fs_reg(this, glsl_type::float_type); 1930 this->delta_y = fs_reg(this, glsl_type::float_type); 1931 } 1932 emit(fs_inst(BRW_OPCODE_ADD, 1933 this->delta_x, 1934 this->pixel_x, 1935 fs_reg(negate(brw_vec1_grf(1, 0))))); 1936 emit(fs_inst(BRW_OPCODE_ADD, 1937 this->delta_y, 1938 this->pixel_y, 1939 fs_reg(negate(brw_vec1_grf(1, 1))))); 1940 1941 this->current_annotation = "compute pos.w and 1/pos.w"; 1942 /* Compute wpos.w. It's always in our setup, since it's needed to 1943 * interpolate the other attributes. 1944 */ 1945 this->wpos_w = fs_reg(this, glsl_type::float_type); 1946 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1947 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1948 /* Compute the pixel 1/W value from wpos.w. */ 1949 this->pixel_w = fs_reg(this, glsl_type::float_type); 1950 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1951 this->current_annotation = NULL; 1952} 1953 1954/** Emits the interpolation for the varying inputs. */ 1955void 1956fs_visitor::emit_interpolation_setup_gen6() 1957{ 1958 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1959 1960 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1961 this->current_annotation = "compute pixel centers"; 1962 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1963 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1964 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1965 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1966 emit(fs_inst(BRW_OPCODE_ADD, 1967 int_pixel_x, 1968 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1969 fs_reg(brw_imm_v(0x10101010)))); 1970 emit(fs_inst(BRW_OPCODE_ADD, 1971 int_pixel_y, 1972 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1973 fs_reg(brw_imm_v(0x11001100)))); 1974 1975 /* As of gen6, we can no longer mix float and int sources. We have 1976 * to turn the integer pixel centers into floats for their actual 1977 * use. 1978 */ 1979 this->pixel_x = fs_reg(this, glsl_type::float_type); 1980 this->pixel_y = fs_reg(this, glsl_type::float_type); 1981 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1982 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1983 1984 this->current_annotation = "compute 1/pos.w"; 1985 this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1986 this->pixel_w = fs_reg(this, glsl_type::float_type); 1987 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1988 1989 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1990 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1991 1992 this->current_annotation = NULL; 1993} 1994 1995void 1996fs_visitor::emit_fb_writes() 1997{ 1998 this->current_annotation = "FB write header"; 1999 GLboolean header_present = GL_TRUE; 2000 int nr = 0; 2001 2002 if (intel->gen >= 6 && 2003 !this->kill_emitted && 2004 c->key.nr_color_regions == 1) { 2005 header_present = false; 2006 } 2007 2008 if (header_present) { 2009 /* m0, m1 header */ 2010 nr += 2; 2011 } 2012 2013 if (c->aa_dest_stencil_reg) { 2014 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2015 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); 2016 } 2017 2018 /* Reserve space for color. It'll be filled in per MRT below. */ 2019 int color_mrf = nr; 2020 nr += 4; 2021 2022 if (c->source_depth_to_render_target) { 2023 if (c->computes_depth) { 2024 /* Hand over gl_FragDepth. */ 2025 assert(this->frag_depth); 2026 fs_reg depth = *(variable_storage(this->frag_depth)); 2027 2028 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 2029 } else { 2030 /* Pass through the payload depth. */ 2031 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2032 fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); 2033 } 2034 } 2035 2036 if (c->dest_depth_reg) { 2037 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2038 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); 2039 } 2040 2041 fs_reg color = reg_undef; 2042 if (this->frag_color) 2043 color = *(variable_storage(this->frag_color)); 2044 else if (this->frag_data) { 2045 color = *(variable_storage(this->frag_data)); 2046 color.type = BRW_REGISTER_TYPE_F; 2047 } 2048 2049 for (int target = 0; target < c->key.nr_color_regions; target++) { 2050 this->current_annotation = talloc_asprintf(this->mem_ctx, 2051 "FB write target %d", 2052 target); 2053 if (this->frag_color || this->frag_data) { 2054 for (int i = 0; i < 4; i++) { 2055 emit(fs_inst(BRW_OPCODE_MOV, 2056 fs_reg(MRF, color_mrf + i), 2057 color)); 2058 color.reg_offset++; 2059 } 2060 } 2061 2062 if (this->frag_color) 2063 color.reg_offset -= 4; 2064 2065 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2066 reg_undef, reg_undef)); 2067 inst->target = target; 2068 inst->base_mrf = 0; 2069 inst->mlen = nr; 2070 if (target == c->key.nr_color_regions - 1) 2071 inst->eot = true; 2072 inst->header_present = header_present; 2073 } 2074 2075 if (c->key.nr_color_regions == 0) { 2076 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 2077 reg_undef, reg_undef)); 2078 inst->base_mrf = 0; 2079 inst->mlen = nr; 2080 inst->eot = true; 2081 inst->header_present = header_present; 2082 } 2083 2084 this->current_annotation = NULL; 2085} 2086 2087void 2088fs_visitor::generate_fb_write(fs_inst *inst) 2089{ 2090 GLboolean eot = inst->eot; 2091 struct brw_reg implied_header; 2092 2093 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 2094 * move, here's g1. 2095 */ 2096 brw_push_insn_state(p); 2097 brw_set_mask_control(p, BRW_MASK_DISABLE); 2098 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2099 2100 if (inst->header_present) { 2101 if (intel->gen >= 6) { 2102 brw_MOV(p, 2103 brw_message_reg(inst->base_mrf), 2104 brw_vec8_grf(0, 0)); 2105 2106 if (inst->target > 0) { 2107 /* Set the render target index for choosing BLEND_STATE. */ 2108 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 2109 BRW_REGISTER_TYPE_UD), 2110 brw_imm_ud(inst->target)); 2111 } 2112 2113 /* Clear viewport index, render target array index. */ 2114 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 2115 BRW_REGISTER_TYPE_UD), 2116 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 2117 brw_imm_ud(0xf7ff)); 2118 2119 implied_header = brw_null_reg(); 2120 } else { 2121 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 2122 } 2123 2124 brw_MOV(p, 2125 brw_message_reg(inst->base_mrf + 1), 2126 brw_vec8_grf(1, 0)); 2127 } else { 2128 implied_header = brw_null_reg(); 2129 } 2130 2131 brw_pop_insn_state(p); 2132 2133 brw_fb_WRITE(p, 2134 8, /* dispatch_width */ 2135 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 2136 inst->base_mrf, 2137 implied_header, 2138 inst->target, 2139 inst->mlen, 2140 0, 2141 eot, 2142 inst->header_present); 2143} 2144 2145void 2146fs_visitor::generate_linterp(fs_inst *inst, 2147 struct brw_reg dst, struct brw_reg *src) 2148{ 2149 struct brw_reg delta_x = src[0]; 2150 struct brw_reg delta_y = src[1]; 2151 struct brw_reg interp = src[2]; 2152 2153 if (brw->has_pln && 2154 delta_y.nr == delta_x.nr + 1 && 2155 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2156 brw_PLN(p, dst, interp, delta_x); 2157 } else { 2158 brw_LINE(p, brw_null_reg(), interp, delta_x); 2159 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2160 } 2161} 2162 2163void 2164fs_visitor::generate_math(fs_inst *inst, 2165 struct brw_reg dst, struct brw_reg *src) 2166{ 2167 int op; 2168 2169 switch (inst->opcode) { 2170 case FS_OPCODE_RCP: 2171 op = BRW_MATH_FUNCTION_INV; 2172 break; 2173 case FS_OPCODE_RSQ: 2174 op = BRW_MATH_FUNCTION_RSQ; 2175 break; 2176 case FS_OPCODE_SQRT: 2177 op = BRW_MATH_FUNCTION_SQRT; 2178 break; 2179 case FS_OPCODE_EXP2: 2180 op = BRW_MATH_FUNCTION_EXP; 2181 break; 2182 case FS_OPCODE_LOG2: 2183 op = BRW_MATH_FUNCTION_LOG; 2184 break; 2185 case FS_OPCODE_POW: 2186 op = BRW_MATH_FUNCTION_POW; 2187 break; 2188 case FS_OPCODE_SIN: 2189 op = BRW_MATH_FUNCTION_SIN; 2190 break; 2191 case FS_OPCODE_COS: 2192 op = BRW_MATH_FUNCTION_COS; 2193 break; 2194 default: 2195 assert(!"not reached: unknown math function"); 2196 op = 0; 2197 break; 2198 } 2199 2200 if (intel->gen >= 6) { 2201 assert(inst->mlen == 0); 2202 2203 if (inst->opcode == FS_OPCODE_POW) { 2204 brw_math2(p, dst, op, src[0], src[1]); 2205 } else { 2206 brw_math(p, dst, 2207 op, 2208 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2209 BRW_MATH_SATURATE_NONE, 2210 0, src[0], 2211 BRW_MATH_DATA_VECTOR, 2212 BRW_MATH_PRECISION_FULL); 2213 } 2214 } else { 2215 assert(inst->mlen >= 1); 2216 2217 brw_math(p, dst, 2218 op, 2219 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2220 BRW_MATH_SATURATE_NONE, 2221 inst->base_mrf, src[0], 2222 BRW_MATH_DATA_VECTOR, 2223 BRW_MATH_PRECISION_FULL); 2224 } 2225} 2226 2227void 2228fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 2229{ 2230 int msg_type = -1; 2231 int rlen = 4; 2232 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2233 2234 if (intel->gen >= 5) { 2235 switch (inst->opcode) { 2236 case FS_OPCODE_TEX: 2237 if (inst->shadow_compare) { 2238 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2239 } else { 2240 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2241 } 2242 break; 2243 case FS_OPCODE_TXB: 2244 if (inst->shadow_compare) { 2245 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2246 } else { 2247 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2248 } 2249 break; 2250 } 2251 } else { 2252 switch (inst->opcode) { 2253 case FS_OPCODE_TEX: 2254 /* Note that G45 and older determines shadow compare and dispatch width 2255 * from message length for most messages. 2256 */ 2257 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2258 if (inst->shadow_compare) { 2259 assert(inst->mlen == 6); 2260 } else { 2261 assert(inst->mlen <= 4); 2262 } 2263 break; 2264 case FS_OPCODE_TXB: 2265 if (inst->shadow_compare) { 2266 assert(inst->mlen == 6); 2267 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2268 } else { 2269 assert(inst->mlen == 9); 2270 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2271 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2272 } 2273 break; 2274 } 2275 } 2276 assert(msg_type != -1); 2277 2278 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2279 rlen = 8; 2280 dst = vec16(dst); 2281 } 2282 2283 brw_SAMPLE(p, 2284 retype(dst, BRW_REGISTER_TYPE_UW), 2285 inst->base_mrf, 2286 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2287 SURF_INDEX_TEXTURE(inst->sampler), 2288 inst->sampler, 2289 WRITEMASK_XYZW, 2290 msg_type, 2291 rlen, 2292 inst->mlen, 2293 0, 2294 1, 2295 simd_mode); 2296} 2297 2298 2299/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2300 * looking like: 2301 * 2302 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2303 * 2304 * and we're trying to produce: 2305 * 2306 * DDX DDY 2307 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2308 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2309 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2310 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2311 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2312 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2313 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2314 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2315 * 2316 * and add another set of two more subspans if in 16-pixel dispatch mode. 2317 * 2318 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2319 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2320 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2321 * between each other. We could probably do it like ddx and swizzle the right 2322 * order later, but bail for now and just produce 2323 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2324 */ 2325void 2326fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2327{ 2328 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2329 BRW_REGISTER_TYPE_F, 2330 BRW_VERTICAL_STRIDE_2, 2331 BRW_WIDTH_2, 2332 BRW_HORIZONTAL_STRIDE_0, 2333 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2334 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2335 BRW_REGISTER_TYPE_F, 2336 BRW_VERTICAL_STRIDE_2, 2337 BRW_WIDTH_2, 2338 BRW_HORIZONTAL_STRIDE_0, 2339 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2340 brw_ADD(p, dst, src0, negate(src1)); 2341} 2342 2343void 2344fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2345{ 2346 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2347 BRW_REGISTER_TYPE_F, 2348 BRW_VERTICAL_STRIDE_4, 2349 BRW_WIDTH_4, 2350 BRW_HORIZONTAL_STRIDE_0, 2351 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2352 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2353 BRW_REGISTER_TYPE_F, 2354 BRW_VERTICAL_STRIDE_4, 2355 BRW_WIDTH_4, 2356 BRW_HORIZONTAL_STRIDE_0, 2357 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2358 brw_ADD(p, dst, src0, negate(src1)); 2359} 2360 2361void 2362fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2363{ 2364 if (intel->gen >= 6) { 2365 /* Gen6 no longer has the mask reg for us to just read the 2366 * active channels from. However, cmp updates just the channels 2367 * of the flag reg that are enabled, so we can get at the 2368 * channel enables that way. In this step, make a reg of ones 2369 * we'll compare to. 2370 */ 2371 brw_MOV(p, mask, brw_imm_ud(1)); 2372 } else { 2373 brw_push_insn_state(p); 2374 brw_set_mask_control(p, BRW_MASK_DISABLE); 2375 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2376 brw_pop_insn_state(p); 2377 } 2378} 2379 2380void 2381fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2382{ 2383 if (intel->gen >= 6) { 2384 struct brw_reg f0 = brw_flag_reg(); 2385 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2386 2387 brw_push_insn_state(p); 2388 brw_set_mask_control(p, BRW_MASK_DISABLE); 2389 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2390 brw_pop_insn_state(p); 2391 2392 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2393 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2394 /* Undo CMP's whacking of predication*/ 2395 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2396 2397 brw_push_insn_state(p); 2398 brw_set_mask_control(p, BRW_MASK_DISABLE); 2399 brw_AND(p, g1, f0, g1); 2400 brw_pop_insn_state(p); 2401 } else { 2402 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2403 2404 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2405 2406 brw_push_insn_state(p); 2407 brw_set_mask_control(p, BRW_MASK_DISABLE); 2408 brw_AND(p, g0, mask, g0); 2409 brw_pop_insn_state(p); 2410 } 2411} 2412 2413void 2414fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2415{ 2416 assert(inst->mlen != 0); 2417 2418 brw_MOV(p, 2419 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2420 retype(src, BRW_REGISTER_TYPE_UD)); 2421 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2422 inst->offset); 2423} 2424 2425void 2426fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2427{ 2428 assert(inst->mlen != 0); 2429 2430 /* Clear any post destination dependencies that would be ignored by 2431 * the block read. See the B-Spec for pre-gen5 send instruction. 2432 * 2433 * This could use a better solution, since texture sampling and 2434 * math reads could potentially run into it as well -- anywhere 2435 * that we have a SEND with a destination that is a register that 2436 * was written but not read within the last N instructions (what's 2437 * N? unsure). This is rare because of dead code elimination, but 2438 * not impossible. 2439 */ 2440 if (intel->gen == 4 && !intel->is_g4x) 2441 brw_MOV(p, brw_null_reg(), dst); 2442 2443 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2444 inst->offset); 2445 2446 if (intel->gen == 4 && !intel->is_g4x) { 2447 /* gen4 errata: destination from a send can't be used as a 2448 * destination until it's been read. Just read it so we don't 2449 * have to worry. 2450 */ 2451 brw_MOV(p, brw_null_reg(), dst); 2452 } 2453} 2454 2455 2456void 2457fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2458{ 2459 assert(inst->mlen != 0); 2460 2461 /* Clear any post destination dependencies that would be ignored by 2462 * the block read. See the B-Spec for pre-gen5 send instruction. 2463 * 2464 * This could use a better solution, since texture sampling and 2465 * math reads could potentially run into it as well -- anywhere 2466 * that we have a SEND with a destination that is a register that 2467 * was written but not read within the last N instructions (what's 2468 * N? unsure). This is rare because of dead code elimination, but 2469 * not impossible. 2470 */ 2471 if (intel->gen == 4 && !intel->is_g4x) 2472 brw_MOV(p, brw_null_reg(), dst); 2473 2474 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2475 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2476 2477 if (intel->gen == 4 && !intel->is_g4x) { 2478 /* gen4 errata: destination from a send can't be used as a 2479 * destination until it's been read. Just read it so we don't 2480 * have to worry. 2481 */ 2482 brw_MOV(p, brw_null_reg(), dst); 2483 } 2484} 2485 2486void 2487fs_visitor::assign_curb_setup() 2488{ 2489 c->prog_data.first_curbe_grf = c->nr_payload_regs; 2490 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2491 2492 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2493 foreach_iter(exec_list_iterator, iter, this->instructions) { 2494 fs_inst *inst = (fs_inst *)iter.get(); 2495 2496 for (unsigned int i = 0; i < 3; i++) { 2497 if (inst->src[i].file == UNIFORM) { 2498 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2499 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2500 constant_nr / 8, 2501 constant_nr % 8); 2502 2503 inst->src[i].file = FIXED_HW_REG; 2504 inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 2505 } 2506 } 2507 } 2508} 2509 2510void 2511fs_visitor::calculate_urb_setup() 2512{ 2513 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2514 urb_setup[i] = -1; 2515 } 2516 2517 int urb_next = 0; 2518 /* Figure out where each of the incoming setup attributes lands. */ 2519 if (intel->gen >= 6) { 2520 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2521 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2522 urb_setup[i] = urb_next++; 2523 } 2524 } 2525 } else { 2526 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2527 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2528 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2529 int fp_index; 2530 2531 if (i >= VERT_RESULT_VAR0) 2532 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2533 else if (i <= VERT_RESULT_TEX7) 2534 fp_index = i; 2535 else 2536 fp_index = -1; 2537 2538 if (fp_index >= 0) 2539 urb_setup[fp_index] = urb_next++; 2540 } 2541 } 2542 } 2543 2544 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2545 c->prog_data.urb_read_length = urb_next * 2; 2546} 2547 2548void 2549fs_visitor::assign_urb_setup() 2550{ 2551 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2552 2553 /* Offset all the urb_setup[] index by the actual position of the 2554 * setup regs, now that the location of the constants has been chosen. 2555 */ 2556 foreach_iter(exec_list_iterator, iter, this->instructions) { 2557 fs_inst *inst = (fs_inst *)iter.get(); 2558 2559 if (inst->opcode != FS_OPCODE_LINTERP) 2560 continue; 2561 2562 assert(inst->src[2].file == FIXED_HW_REG); 2563 2564 inst->src[2].fixed_hw_reg.nr += urb_start; 2565 } 2566 2567 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2568} 2569 2570/** 2571 * Split large virtual GRFs into separate components if we can. 2572 * 2573 * This is mostly duplicated with what brw_fs_vector_splitting does, 2574 * but that's really conservative because it's afraid of doing 2575 * splitting that doesn't result in real progress after the rest of 2576 * the optimization phases, which would cause infinite looping in 2577 * optimization. We can do it once here, safely. This also has the 2578 * opportunity to split interpolated values, or maybe even uniforms, 2579 * which we don't have at the IR level. 2580 * 2581 * We want to split, because virtual GRFs are what we register 2582 * allocate and spill (due to contiguousness requirements for some 2583 * instructions), and they're what we naturally generate in the 2584 * codegen process, but most virtual GRFs don't actually need to be 2585 * contiguous sets of GRFs. If we split, we'll end up with reduced 2586 * live intervals and better dead code elimination and coalescing. 2587 */ 2588void 2589fs_visitor::split_virtual_grfs() 2590{ 2591 int num_vars = this->virtual_grf_next; 2592 bool split_grf[num_vars]; 2593 int new_virtual_grf[num_vars]; 2594 2595 /* Try to split anything > 0 sized. */ 2596 for (int i = 0; i < num_vars; i++) { 2597 if (this->virtual_grf_sizes[i] != 1) 2598 split_grf[i] = true; 2599 else 2600 split_grf[i] = false; 2601 } 2602 2603 if (brw->has_pln) { 2604 /* PLN opcodes rely on the delta_xy being contiguous. */ 2605 split_grf[this->delta_x.reg] = false; 2606 } 2607 2608 foreach_iter(exec_list_iterator, iter, this->instructions) { 2609 fs_inst *inst = (fs_inst *)iter.get(); 2610 2611 /* Texturing produces 4 contiguous registers, so no splitting. */ 2612 if ((inst->opcode == FS_OPCODE_TEX || 2613 inst->opcode == FS_OPCODE_TXB || 2614 inst->opcode == FS_OPCODE_TXL) && 2615 inst->dst.file == GRF) { 2616 split_grf[inst->dst.reg] = false; 2617 } 2618 } 2619 2620 /* Allocate new space for split regs. Note that the virtual 2621 * numbers will be contiguous. 2622 */ 2623 for (int i = 0; i < num_vars; i++) { 2624 if (split_grf[i]) { 2625 new_virtual_grf[i] = virtual_grf_alloc(1); 2626 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2627 int reg = virtual_grf_alloc(1); 2628 assert(reg == new_virtual_grf[i] + j - 1); 2629 (void) reg; 2630 } 2631 this->virtual_grf_sizes[i] = 1; 2632 } 2633 } 2634 2635 foreach_iter(exec_list_iterator, iter, this->instructions) { 2636 fs_inst *inst = (fs_inst *)iter.get(); 2637 2638 if (inst->dst.file == GRF && 2639 split_grf[inst->dst.reg] && 2640 inst->dst.reg_offset != 0) { 2641 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2642 inst->dst.reg_offset - 1); 2643 inst->dst.reg_offset = 0; 2644 } 2645 for (int i = 0; i < 3; i++) { 2646 if (inst->src[i].file == GRF && 2647 split_grf[inst->src[i].reg] && 2648 inst->src[i].reg_offset != 0) { 2649 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2650 inst->src[i].reg_offset - 1); 2651 inst->src[i].reg_offset = 0; 2652 } 2653 } 2654 } 2655} 2656 2657/** 2658 * Choose accesses from the UNIFORM file to demote to using the pull 2659 * constant buffer. 2660 * 2661 * We allow a fragment shader to have more than the specified minimum 2662 * maximum number of fragment shader uniform components (64). If 2663 * there are too many of these, they'd fill up all of register space. 2664 * So, this will push some of them out to the pull constant buffer and 2665 * update the program to load them. 2666 */ 2667void 2668fs_visitor::setup_pull_constants() 2669{ 2670 /* Only allow 16 registers (128 uniform components) as push constants. */ 2671 unsigned int max_uniform_components = 16 * 8; 2672 if (c->prog_data.nr_params <= max_uniform_components) 2673 return; 2674 2675 /* Just demote the end of the list. We could probably do better 2676 * here, demoting things that are rarely used in the program first. 2677 */ 2678 int pull_uniform_base = max_uniform_components; 2679 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2680 2681 foreach_iter(exec_list_iterator, iter, this->instructions) { 2682 fs_inst *inst = (fs_inst *)iter.get(); 2683 2684 for (int i = 0; i < 3; i++) { 2685 if (inst->src[i].file != UNIFORM) 2686 continue; 2687 2688 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2689 if (uniform_nr < pull_uniform_base) 2690 continue; 2691 2692 fs_reg dst = fs_reg(this, glsl_type::float_type); 2693 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2694 dst); 2695 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2696 pull->ir = inst->ir; 2697 pull->annotation = inst->annotation; 2698 pull->base_mrf = 14; 2699 pull->mlen = 1; 2700 2701 inst->insert_before(pull); 2702 2703 inst->src[i].file = GRF; 2704 inst->src[i].reg = dst.reg; 2705 inst->src[i].reg_offset = 0; 2706 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2707 } 2708 } 2709 2710 for (int i = 0; i < pull_uniform_count; i++) { 2711 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2712 c->prog_data.pull_param_convert[i] = 2713 c->prog_data.param_convert[pull_uniform_base + i]; 2714 } 2715 c->prog_data.nr_params -= pull_uniform_count; 2716 c->prog_data.nr_pull_params = pull_uniform_count; 2717} 2718 2719void 2720fs_visitor::calculate_live_intervals() 2721{ 2722 int num_vars = this->virtual_grf_next; 2723 int *def = talloc_array(mem_ctx, int, num_vars); 2724 int *use = talloc_array(mem_ctx, int, num_vars); 2725 int loop_depth = 0; 2726 int loop_start = 0; 2727 int bb_header_ip = 0; 2728 2729 for (int i = 0; i < num_vars; i++) { 2730 def[i] = 1 << 30; 2731 use[i] = -1; 2732 } 2733 2734 int ip = 0; 2735 foreach_iter(exec_list_iterator, iter, this->instructions) { 2736 fs_inst *inst = (fs_inst *)iter.get(); 2737 2738 if (inst->opcode == BRW_OPCODE_DO) { 2739 if (loop_depth++ == 0) 2740 loop_start = ip; 2741 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2742 loop_depth--; 2743 2744 if (loop_depth == 0) { 2745 /* Patches up the use of vars marked for being live across 2746 * the whole loop. 2747 */ 2748 for (int i = 0; i < num_vars; i++) { 2749 if (use[i] == loop_start) { 2750 use[i] = ip; 2751 } 2752 } 2753 } 2754 } else { 2755 for (unsigned int i = 0; i < 3; i++) { 2756 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2757 int reg = inst->src[i].reg; 2758 2759 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2760 def[reg] >= bb_header_ip)) { 2761 use[reg] = ip; 2762 } else { 2763 def[reg] = MIN2(loop_start, def[reg]); 2764 use[reg] = loop_start; 2765 2766 /* Nobody else is going to go smash our start to 2767 * later in the loop now, because def[reg] now 2768 * points before the bb header. 2769 */ 2770 } 2771 } 2772 } 2773 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2774 int reg = inst->dst.reg; 2775 2776 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2777 !inst->predicated)) { 2778 def[reg] = MIN2(def[reg], ip); 2779 } else { 2780 def[reg] = MIN2(def[reg], loop_start); 2781 } 2782 } 2783 } 2784 2785 ip++; 2786 2787 /* Set the basic block header IP. This is used for determining 2788 * if a complete def of single-register virtual GRF in a loop 2789 * dominates a use in the same basic block. It's a quick way to 2790 * reduce the live interval range of most register used in a 2791 * loop. 2792 */ 2793 if (inst->opcode == BRW_OPCODE_IF || 2794 inst->opcode == BRW_OPCODE_ELSE || 2795 inst->opcode == BRW_OPCODE_ENDIF || 2796 inst->opcode == BRW_OPCODE_DO || 2797 inst->opcode == BRW_OPCODE_WHILE || 2798 inst->opcode == BRW_OPCODE_BREAK || 2799 inst->opcode == BRW_OPCODE_CONTINUE) { 2800 bb_header_ip = ip; 2801 } 2802 } 2803 2804 talloc_free(this->virtual_grf_def); 2805 talloc_free(this->virtual_grf_use); 2806 this->virtual_grf_def = def; 2807 this->virtual_grf_use = use; 2808} 2809 2810/** 2811 * Attempts to move immediate constants into the immediate 2812 * constant slot of following instructions. 2813 * 2814 * Immediate constants are a bit tricky -- they have to be in the last 2815 * operand slot, you can't do abs/negate on them, 2816 */ 2817 2818bool 2819fs_visitor::propagate_constants() 2820{ 2821 bool progress = false; 2822 2823 foreach_iter(exec_list_iterator, iter, this->instructions) { 2824 fs_inst *inst = (fs_inst *)iter.get(); 2825 2826 if (inst->opcode != BRW_OPCODE_MOV || 2827 inst->predicated || 2828 inst->dst.file != GRF || inst->src[0].file != IMM || 2829 inst->dst.type != inst->src[0].type) 2830 continue; 2831 2832 /* Don't bother with cases where we should have had the 2833 * operation on the constant folded in GLSL already. 2834 */ 2835 if (inst->saturate) 2836 continue; 2837 2838 /* Found a move of a constant to a GRF. Find anything else using the GRF 2839 * before it's written, and replace it with the constant if we can. 2840 */ 2841 exec_list_iterator scan_iter = iter; 2842 scan_iter.next(); 2843 for (; scan_iter.has_next(); scan_iter.next()) { 2844 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2845 2846 if (scan_inst->opcode == BRW_OPCODE_DO || 2847 scan_inst->opcode == BRW_OPCODE_WHILE || 2848 scan_inst->opcode == BRW_OPCODE_ELSE || 2849 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2850 break; 2851 } 2852 2853 for (int i = 2; i >= 0; i--) { 2854 if (scan_inst->src[i].file != GRF || 2855 scan_inst->src[i].reg != inst->dst.reg || 2856 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2857 continue; 2858 2859 /* Don't bother with cases where we should have had the 2860 * operation on the constant folded in GLSL already. 2861 */ 2862 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2863 continue; 2864 2865 switch (scan_inst->opcode) { 2866 case BRW_OPCODE_MOV: 2867 scan_inst->src[i] = inst->src[0]; 2868 progress = true; 2869 break; 2870 2871 case BRW_OPCODE_MUL: 2872 case BRW_OPCODE_ADD: 2873 if (i == 1) { 2874 scan_inst->src[i] = inst->src[0]; 2875 progress = true; 2876 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2877 /* Fit this constant in by commuting the operands */ 2878 scan_inst->src[0] = scan_inst->src[1]; 2879 scan_inst->src[1] = inst->src[0]; 2880 } 2881 break; 2882 case BRW_OPCODE_CMP: 2883 case BRW_OPCODE_SEL: 2884 if (i == 1) { 2885 scan_inst->src[i] = inst->src[0]; 2886 progress = true; 2887 } 2888 } 2889 } 2890 2891 if (scan_inst->dst.file == GRF && 2892 scan_inst->dst.reg == inst->dst.reg && 2893 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2894 scan_inst->opcode == FS_OPCODE_TEX)) { 2895 break; 2896 } 2897 } 2898 } 2899 2900 return progress; 2901} 2902/** 2903 * Must be called after calculate_live_intervales() to remove unused 2904 * writes to registers -- register allocation will fail otherwise 2905 * because something deffed but not used won't be considered to 2906 * interfere with other regs. 2907 */ 2908bool 2909fs_visitor::dead_code_eliminate() 2910{ 2911 bool progress = false; 2912 int pc = 0; 2913 2914 foreach_iter(exec_list_iterator, iter, this->instructions) { 2915 fs_inst *inst = (fs_inst *)iter.get(); 2916 2917 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 2918 inst->remove(); 2919 progress = true; 2920 } 2921 2922 pc++; 2923 } 2924 2925 return progress; 2926} 2927 2928bool 2929fs_visitor::register_coalesce() 2930{ 2931 bool progress = false; 2932 2933 foreach_iter(exec_list_iterator, iter, this->instructions) { 2934 fs_inst *inst = (fs_inst *)iter.get(); 2935 2936 if (inst->opcode != BRW_OPCODE_MOV || 2937 inst->predicated || 2938 inst->saturate || 2939 inst->dst.file != GRF || inst->src[0].file != GRF || 2940 inst->dst.type != inst->src[0].type) 2941 continue; 2942 2943 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2944 * them: check for no writes to either one until the exit of the 2945 * program. 2946 */ 2947 bool interfered = false; 2948 exec_list_iterator scan_iter = iter; 2949 scan_iter.next(); 2950 for (; scan_iter.has_next(); scan_iter.next()) { 2951 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2952 2953 if (scan_inst->opcode == BRW_OPCODE_DO || 2954 scan_inst->opcode == BRW_OPCODE_WHILE || 2955 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2956 interfered = true; 2957 iter = scan_iter; 2958 break; 2959 } 2960 2961 if (scan_inst->dst.file == GRF) { 2962 if (scan_inst->dst.reg == inst->dst.reg && 2963 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2964 scan_inst->opcode == FS_OPCODE_TEX)) { 2965 interfered = true; 2966 break; 2967 } 2968 if (scan_inst->dst.reg == inst->src[0].reg && 2969 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2970 scan_inst->opcode == FS_OPCODE_TEX)) { 2971 interfered = true; 2972 break; 2973 } 2974 } 2975 } 2976 if (interfered) { 2977 continue; 2978 } 2979 2980 /* Update live interval so we don't have to recalculate. */ 2981 this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], 2982 virtual_grf_use[inst->dst.reg]); 2983 2984 /* Rewrite the later usage to point at the source of the move to 2985 * be removed. 2986 */ 2987 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2988 scan_iter.next()) { 2989 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2990 2991 for (int i = 0; i < 3; i++) { 2992 if (scan_inst->src[i].file == GRF && 2993 scan_inst->src[i].reg == inst->dst.reg && 2994 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2995 scan_inst->src[i].reg = inst->src[0].reg; 2996 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2997 scan_inst->src[i].abs |= inst->src[0].abs; 2998 scan_inst->src[i].negate ^= inst->src[0].negate; 2999 scan_inst->src[i].smear = inst->src[0].smear; 3000 } 3001 } 3002 } 3003 3004 inst->remove(); 3005 progress = true; 3006 } 3007 3008 return progress; 3009} 3010 3011 3012bool 3013fs_visitor::compute_to_mrf() 3014{ 3015 bool progress = false; 3016 int next_ip = 0; 3017 3018 foreach_iter(exec_list_iterator, iter, this->instructions) { 3019 fs_inst *inst = (fs_inst *)iter.get(); 3020 3021 int ip = next_ip; 3022 next_ip++; 3023 3024 if (inst->opcode != BRW_OPCODE_MOV || 3025 inst->predicated || 3026 inst->dst.file != MRF || inst->src[0].file != GRF || 3027 inst->dst.type != inst->src[0].type || 3028 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 3029 continue; 3030 3031 /* Can't compute-to-MRF this GRF if someone else was going to 3032 * read it later. 3033 */ 3034 if (this->virtual_grf_use[inst->src[0].reg] > ip) 3035 continue; 3036 3037 /* Found a move of a GRF to a MRF. Let's see if we can go 3038 * rewrite the thing that made this GRF to write into the MRF. 3039 */ 3040 fs_inst *scan_inst; 3041 for (scan_inst = (fs_inst *)inst->prev; 3042 scan_inst->prev != NULL; 3043 scan_inst = (fs_inst *)scan_inst->prev) { 3044 if (scan_inst->dst.file == GRF && 3045 scan_inst->dst.reg == inst->src[0].reg) { 3046 /* Found the last thing to write our reg we want to turn 3047 * into a compute-to-MRF. 3048 */ 3049 3050 if (scan_inst->opcode == FS_OPCODE_TEX) { 3051 /* texturing writes several continuous regs, so we can't 3052 * compute-to-mrf that. 3053 */ 3054 break; 3055 } 3056 3057 /* If it's predicated, it (probably) didn't populate all 3058 * the channels. 3059 */ 3060 if (scan_inst->predicated) 3061 break; 3062 3063 /* SEND instructions can't have MRF as a destination. */ 3064 if (scan_inst->mlen) 3065 break; 3066 3067 if (intel->gen >= 6) { 3068 /* gen6 math instructions must have the destination be 3069 * GRF, so no compute-to-MRF for them. 3070 */ 3071 if (scan_inst->opcode == FS_OPCODE_RCP || 3072 scan_inst->opcode == FS_OPCODE_RSQ || 3073 scan_inst->opcode == FS_OPCODE_SQRT || 3074 scan_inst->opcode == FS_OPCODE_EXP2 || 3075 scan_inst->opcode == FS_OPCODE_LOG2 || 3076 scan_inst->opcode == FS_OPCODE_SIN || 3077 scan_inst->opcode == FS_OPCODE_COS || 3078 scan_inst->opcode == FS_OPCODE_POW) { 3079 break; 3080 } 3081 } 3082 3083 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 3084 /* Found the creator of our MRF's source value. */ 3085 scan_inst->dst.file = MRF; 3086 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3087 scan_inst->saturate |= inst->saturate; 3088 inst->remove(); 3089 progress = true; 3090 } 3091 break; 3092 } 3093 3094 /* We don't handle flow control here. Most computation of 3095 * values that end up in MRFs are shortly before the MRF 3096 * write anyway. 3097 */ 3098 if (scan_inst->opcode == BRW_OPCODE_DO || 3099 scan_inst->opcode == BRW_OPCODE_WHILE || 3100 scan_inst->opcode == BRW_OPCODE_ENDIF) { 3101 break; 3102 } 3103 3104 /* You can't read from an MRF, so if someone else reads our 3105 * MRF's source GRF that we wanted to rewrite, that stops us. 3106 */ 3107 bool interfered = false; 3108 for (int i = 0; i < 3; i++) { 3109 if (scan_inst->src[i].file == GRF && 3110 scan_inst->src[i].reg == inst->src[0].reg && 3111 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 3112 interfered = true; 3113 } 3114 } 3115 if (interfered) 3116 break; 3117 3118 if (scan_inst->dst.file == MRF && 3119 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 3120 /* Somebody else wrote our MRF here, so we can't can't 3121 * compute-to-MRF before that. 3122 */ 3123 break; 3124 } 3125 3126 if (scan_inst->mlen > 0) { 3127 /* Found a SEND instruction, which means that there are 3128 * live values in MRFs from base_mrf to base_mrf + 3129 * scan_inst->mlen - 1. Don't go pushing our MRF write up 3130 * above it. 3131 */ 3132 if (inst->dst.hw_reg >= scan_inst->base_mrf && 3133 inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { 3134 break; 3135 } 3136 } 3137 } 3138 } 3139 3140 return progress; 3141} 3142 3143/** 3144 * Walks through basic blocks, locking for repeated MRF writes and 3145 * removing the later ones. 3146 */ 3147bool 3148fs_visitor::remove_duplicate_mrf_writes() 3149{ 3150 fs_inst *last_mrf_move[16]; 3151 bool progress = false; 3152 3153 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3154 3155 foreach_iter(exec_list_iterator, iter, this->instructions) { 3156 fs_inst *inst = (fs_inst *)iter.get(); 3157 3158 switch (inst->opcode) { 3159 case BRW_OPCODE_DO: 3160 case BRW_OPCODE_WHILE: 3161 case BRW_OPCODE_IF: 3162 case BRW_OPCODE_ELSE: 3163 case BRW_OPCODE_ENDIF: 3164 memset(last_mrf_move, 0, sizeof(last_mrf_move)); 3165 continue; 3166 default: 3167 break; 3168 } 3169 3170 if (inst->opcode == BRW_OPCODE_MOV && 3171 inst->dst.file == MRF) { 3172 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; 3173 if (prev_inst && inst->equals(prev_inst)) { 3174 inst->remove(); 3175 progress = true; 3176 continue; 3177 } 3178 } 3179 3180 /* Clear out the last-write records for MRFs that were overwritten. */ 3181 if (inst->dst.file == MRF) { 3182 last_mrf_move[inst->dst.hw_reg] = NULL; 3183 } 3184 3185 if (inst->mlen > 0) { 3186 /* Found a SEND instruction, which will include two of fewer 3187 * implied MRF writes. We could do better here. 3188 */ 3189 for (int i = 0; i < implied_mrf_writes(inst); i++) { 3190 last_mrf_move[inst->base_mrf + i] = NULL; 3191 } 3192 } 3193 3194 /* Clear out any MRF move records whose sources got overwritten. */ 3195 if (inst->dst.file == GRF) { 3196 for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { 3197 if (last_mrf_move[i] && 3198 last_mrf_move[i]->src[0].reg == inst->dst.reg) { 3199 last_mrf_move[i] = NULL; 3200 } 3201 } 3202 } 3203 3204 if (inst->opcode == BRW_OPCODE_MOV && 3205 inst->dst.file == MRF && 3206 inst->src[0].file == GRF && 3207 !inst->predicated) { 3208 last_mrf_move[inst->dst.hw_reg] = inst; 3209 } 3210 } 3211 3212 return progress; 3213} 3214 3215bool 3216fs_visitor::virtual_grf_interferes(int a, int b) 3217{ 3218 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3219 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3220 3221 /* For dead code, just check if the def interferes with the other range. */ 3222 if (this->virtual_grf_use[a] == -1) { 3223 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 3224 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 3225 } 3226 if (this->virtual_grf_use[b] == -1) { 3227 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 3228 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 3229 } 3230 3231 return start < end; 3232} 3233 3234static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3235{ 3236 struct brw_reg brw_reg; 3237 3238 switch (reg->file) { 3239 case GRF: 3240 case ARF: 3241 case MRF: 3242 if (reg->smear == -1) { 3243 brw_reg = brw_vec8_reg(reg->file, 3244 reg->hw_reg, 0); 3245 } else { 3246 brw_reg = brw_vec1_reg(reg->file, 3247 reg->hw_reg, reg->smear); 3248 } 3249 brw_reg = retype(brw_reg, reg->type); 3250 break; 3251 case IMM: 3252 switch (reg->type) { 3253 case BRW_REGISTER_TYPE_F: 3254 brw_reg = brw_imm_f(reg->imm.f); 3255 break; 3256 case BRW_REGISTER_TYPE_D: 3257 brw_reg = brw_imm_d(reg->imm.i); 3258 break; 3259 case BRW_REGISTER_TYPE_UD: 3260 brw_reg = brw_imm_ud(reg->imm.u); 3261 break; 3262 default: 3263 assert(!"not reached"); 3264 brw_reg = brw_null_reg(); 3265 break; 3266 } 3267 break; 3268 case FIXED_HW_REG: 3269 brw_reg = reg->fixed_hw_reg; 3270 break; 3271 case BAD_FILE: 3272 /* Probably unused. */ 3273 brw_reg = brw_null_reg(); 3274 break; 3275 case UNIFORM: 3276 assert(!"not reached"); 3277 brw_reg = brw_null_reg(); 3278 break; 3279 default: 3280 assert(!"not reached"); 3281 brw_reg = brw_null_reg(); 3282 break; 3283 } 3284 if (reg->abs) 3285 brw_reg = brw_abs(brw_reg); 3286 if (reg->negate) 3287 brw_reg = negate(brw_reg); 3288 3289 return brw_reg; 3290} 3291 3292void 3293fs_visitor::generate_code() 3294{ 3295 int last_native_inst = 0; 3296 struct brw_instruction *if_stack[16], *loop_stack[16]; 3297 int if_stack_depth = 0, loop_stack_depth = 0; 3298 int if_depth_in_loop[16]; 3299 const char *last_annotation_string = NULL; 3300 ir_instruction *last_annotation_ir = NULL; 3301 3302 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3303 printf("Native code for fragment shader %d:\n", 3304 ctx->Shader.CurrentFragmentProgram->Name); 3305 } 3306 3307 if_depth_in_loop[loop_stack_depth] = 0; 3308 3309 memset(&if_stack, 0, sizeof(if_stack)); 3310 foreach_iter(exec_list_iterator, iter, this->instructions) { 3311 fs_inst *inst = (fs_inst *)iter.get(); 3312 struct brw_reg src[3], dst; 3313 3314 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3315 if (last_annotation_ir != inst->ir) { 3316 last_annotation_ir = inst->ir; 3317 if (last_annotation_ir) { 3318 printf(" "); 3319 last_annotation_ir->print(); 3320 printf("\n"); 3321 } 3322 } 3323 if (last_annotation_string != inst->annotation) { 3324 last_annotation_string = inst->annotation; 3325 if (last_annotation_string) 3326 printf(" %s\n", last_annotation_string); 3327 } 3328 } 3329 3330 for (unsigned int i = 0; i < 3; i++) { 3331 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3332 } 3333 dst = brw_reg_from_fs_reg(&inst->dst); 3334 3335 brw_set_conditionalmod(p, inst->conditional_mod); 3336 brw_set_predicate_control(p, inst->predicated); 3337 brw_set_saturate(p, inst->saturate); 3338 3339 switch (inst->opcode) { 3340 case BRW_OPCODE_MOV: 3341 brw_MOV(p, dst, src[0]); 3342 break; 3343 case BRW_OPCODE_ADD: 3344 brw_ADD(p, dst, src[0], src[1]); 3345 break; 3346 case BRW_OPCODE_MUL: 3347 brw_MUL(p, dst, src[0], src[1]); 3348 break; 3349 3350 case BRW_OPCODE_FRC: 3351 brw_FRC(p, dst, src[0]); 3352 break; 3353 case BRW_OPCODE_RNDD: 3354 brw_RNDD(p, dst, src[0]); 3355 break; 3356 case BRW_OPCODE_RNDE: 3357 brw_RNDE(p, dst, src[0]); 3358 break; 3359 case BRW_OPCODE_RNDZ: 3360 brw_RNDZ(p, dst, src[0]); 3361 break; 3362 3363 case BRW_OPCODE_AND: 3364 brw_AND(p, dst, src[0], src[1]); 3365 break; 3366 case BRW_OPCODE_OR: 3367 brw_OR(p, dst, src[0], src[1]); 3368 break; 3369 case BRW_OPCODE_XOR: 3370 brw_XOR(p, dst, src[0], src[1]); 3371 break; 3372 case BRW_OPCODE_NOT: 3373 brw_NOT(p, dst, src[0]); 3374 break; 3375 case BRW_OPCODE_ASR: 3376 brw_ASR(p, dst, src[0], src[1]); 3377 break; 3378 case BRW_OPCODE_SHR: 3379 brw_SHR(p, dst, src[0], src[1]); 3380 break; 3381 case BRW_OPCODE_SHL: 3382 brw_SHL(p, dst, src[0], src[1]); 3383 break; 3384 3385 case BRW_OPCODE_CMP: 3386 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3387 break; 3388 case BRW_OPCODE_SEL: 3389 brw_SEL(p, dst, src[0], src[1]); 3390 break; 3391 3392 case BRW_OPCODE_IF: 3393 assert(if_stack_depth < 16); 3394 if (inst->src[0].file != BAD_FILE) { 3395 assert(intel->gen >= 6); 3396 if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); 3397 } else { 3398 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3399 } 3400 if_depth_in_loop[loop_stack_depth]++; 3401 if_stack_depth++; 3402 break; 3403 3404 case BRW_OPCODE_ELSE: 3405 if_stack[if_stack_depth - 1] = 3406 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3407 break; 3408 case BRW_OPCODE_ENDIF: 3409 if_stack_depth--; 3410 brw_ENDIF(p , if_stack[if_stack_depth]); 3411 if_depth_in_loop[loop_stack_depth]--; 3412 break; 3413 3414 case BRW_OPCODE_DO: 3415 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3416 if_depth_in_loop[loop_stack_depth] = 0; 3417 break; 3418 3419 case BRW_OPCODE_BREAK: 3420 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3421 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3422 break; 3423 case BRW_OPCODE_CONTINUE: 3424 /* FINISHME: We need to write the loop instruction support still. */ 3425 if (intel->gen >= 6) 3426 brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); 3427 else 3428 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3429 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3430 break; 3431 3432 case BRW_OPCODE_WHILE: { 3433 struct brw_instruction *inst0, *inst1; 3434 GLuint br = 1; 3435 3436 if (intel->gen >= 5) 3437 br = 2; 3438 3439 assert(loop_stack_depth > 0); 3440 loop_stack_depth--; 3441 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3442 if (intel->gen < 6) { 3443 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3444 while (inst0 > loop_stack[loop_stack_depth]) { 3445 inst0--; 3446 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3447 inst0->bits3.if_else.jump_count == 0) { 3448 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3449 } 3450 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3451 inst0->bits3.if_else.jump_count == 0) { 3452 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3453 } 3454 } 3455 } 3456 } 3457 break; 3458 3459 case FS_OPCODE_RCP: 3460 case FS_OPCODE_RSQ: 3461 case FS_OPCODE_SQRT: 3462 case FS_OPCODE_EXP2: 3463 case FS_OPCODE_LOG2: 3464 case FS_OPCODE_POW: 3465 case FS_OPCODE_SIN: 3466 case FS_OPCODE_COS: 3467 generate_math(inst, dst, src); 3468 break; 3469 case FS_OPCODE_LINTERP: 3470 generate_linterp(inst, dst, src); 3471 break; 3472 case FS_OPCODE_TEX: 3473 case FS_OPCODE_TXB: 3474 case FS_OPCODE_TXL: 3475 generate_tex(inst, dst); 3476 break; 3477 case FS_OPCODE_DISCARD_NOT: 3478 generate_discard_not(inst, dst); 3479 break; 3480 case FS_OPCODE_DISCARD_AND: 3481 generate_discard_and(inst, src[0]); 3482 break; 3483 case FS_OPCODE_DDX: 3484 generate_ddx(inst, dst, src[0]); 3485 break; 3486 case FS_OPCODE_DDY: 3487 generate_ddy(inst, dst, src[0]); 3488 break; 3489 3490 case FS_OPCODE_SPILL: 3491 generate_spill(inst, src[0]); 3492 break; 3493 3494 case FS_OPCODE_UNSPILL: 3495 generate_unspill(inst, dst); 3496 break; 3497 3498 case FS_OPCODE_PULL_CONSTANT_LOAD: 3499 generate_pull_constant_load(inst, dst); 3500 break; 3501 3502 case FS_OPCODE_FB_WRITE: 3503 generate_fb_write(inst); 3504 break; 3505 default: 3506 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3507 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3508 brw_opcodes[inst->opcode].name); 3509 } else { 3510 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3511 } 3512 this->fail = true; 3513 } 3514 3515 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3516 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3517 if (0) { 3518 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3519 ((uint32_t *)&p->store[i])[3], 3520 ((uint32_t *)&p->store[i])[2], 3521 ((uint32_t *)&p->store[i])[1], 3522 ((uint32_t *)&p->store[i])[0]); 3523 } 3524 brw_disasm(stdout, &p->store[i], intel->gen); 3525 } 3526 } 3527 3528 last_native_inst = p->nr_insn; 3529 } 3530 3531 brw_set_uip_jip(p); 3532 3533 /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS 3534 * emit issues, it doesn't get the jump distances into the output, 3535 * which is often something we want to debug. So this is here in 3536 * case you're doing that. 3537 */ 3538 if (0) { 3539 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3540 for (unsigned int i = 0; i < p->nr_insn; i++) { 3541 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3542 ((uint32_t *)&p->store[i])[3], 3543 ((uint32_t *)&p->store[i])[2], 3544 ((uint32_t *)&p->store[i])[1], 3545 ((uint32_t *)&p->store[i])[0]); 3546 brw_disasm(stdout, &p->store[i], intel->gen); 3547 } 3548 } 3549 } 3550} 3551 3552GLboolean 3553brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3554{ 3555 struct intel_context *intel = &brw->intel; 3556 struct gl_context *ctx = &intel->ctx; 3557 struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; 3558 3559 if (!prog) 3560 return GL_FALSE; 3561 3562 struct brw_shader *shader = 3563 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3564 if (!shader) 3565 return GL_FALSE; 3566 3567 /* We always use 8-wide mode, at least for now. For one, flow 3568 * control only works in 8-wide. Also, when we're fragment shader 3569 * bound, we're almost always under register pressure as well, so 3570 * 8-wide would save us from the performance cliff of spilling 3571 * regs. 3572 */ 3573 c->dispatch_width = 8; 3574 3575 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3576 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3577 _mesa_print_ir(shader->ir, NULL); 3578 printf("\n"); 3579 } 3580 3581 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3582 */ 3583 fs_visitor v(c, shader); 3584 3585 if (0) { 3586 v.emit_dummy_fs(); 3587 } else { 3588 v.calculate_urb_setup(); 3589 if (intel->gen < 6) 3590 v.emit_interpolation_setup_gen4(); 3591 else 3592 v.emit_interpolation_setup_gen6(); 3593 3594 /* Generate FS IR for main(). (the visitor only descends into 3595 * functions called "main"). 3596 */ 3597 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3598 ir_instruction *ir = (ir_instruction *)iter.get(); 3599 v.base_ir = ir; 3600 ir->accept(&v); 3601 } 3602 3603 v.emit_fb_writes(); 3604 3605 v.split_virtual_grfs(); 3606 v.setup_pull_constants(); 3607 3608 v.assign_curb_setup(); 3609 v.assign_urb_setup(); 3610 3611 bool progress; 3612 do { 3613 progress = false; 3614 3615 progress = v.remove_duplicate_mrf_writes() || progress; 3616 3617 v.calculate_live_intervals(); 3618 progress = v.propagate_constants() || progress; 3619 progress = v.register_coalesce() || progress; 3620 progress = v.compute_to_mrf() || progress; 3621 progress = v.dead_code_eliminate() || progress; 3622 } while (progress); 3623 3624 if (0) { 3625 /* Debug of register spilling: Go spill everything. */ 3626 int virtual_grf_count = v.virtual_grf_next; 3627 for (int i = 1; i < virtual_grf_count; i++) { 3628 v.spill_reg(i); 3629 } 3630 v.calculate_live_intervals(); 3631 } 3632 3633 if (0) 3634 v.assign_regs_trivial(); 3635 else { 3636 while (!v.assign_regs()) { 3637 if (v.fail) 3638 break; 3639 3640 v.calculate_live_intervals(); 3641 } 3642 } 3643 } 3644 3645 if (!v.fail) 3646 v.generate_code(); 3647 3648 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3649 3650 if (v.fail) 3651 return GL_FALSE; 3652 3653 c->prog_data.total_grf = v.grf_used; 3654 3655 return GL_TRUE; 3656} 3657