brw_fs.cpp revision 0cadd32b6dc80455802c04b479ec8e768f93ffe1
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static int using_new_fs = -1; 52static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 53 54struct gl_shader * 55brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 56{ 57 struct brw_shader *shader; 58 59 shader = talloc_zero(NULL, struct brw_shader); 60 if (shader) { 61 shader->base.Type = type; 62 shader->base.Name = name; 63 _mesa_init_shader(ctx, &shader->base); 64 } 65 66 return &shader->base; 67} 68 69struct gl_shader_program * 70brw_new_shader_program(GLcontext *ctx, GLuint name) 71{ 72 struct brw_shader_program *prog; 73 prog = talloc_zero(NULL, struct brw_shader_program); 74 if (prog) { 75 prog->base.Name = name; 76 _mesa_init_shader_program(ctx, &prog->base); 77 } 78 return &prog->base; 79} 80 81GLboolean 82brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 83{ 84 if (!_mesa_ir_compile_shader(ctx, shader)) 85 return GL_FALSE; 86 87 return GL_TRUE; 88} 89 90GLboolean 91brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 92{ 93 struct intel_context *intel = intel_context(ctx); 94 if (using_new_fs == -1) 95 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 96 97 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 98 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 99 100 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 101 void *mem_ctx = talloc_new(NULL); 102 bool progress; 103 104 if (shader->ir) 105 talloc_free(shader->ir); 106 shader->ir = new(shader) exec_list; 107 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 108 109 do_mat_op_to_vec(shader->ir); 110 do_mod_to_fract(shader->ir); 111 do_div_to_mul_rcp(shader->ir); 112 do_sub_to_add_neg(shader->ir); 113 do_explog_to_explog2(shader->ir); 114 do_lower_texture_projection(shader->ir); 115 brw_do_cubemap_normalize(shader->ir); 116 117 do { 118 progress = false; 119 120 brw_do_channel_expressions(shader->ir); 121 brw_do_vector_splitting(shader->ir); 122 123 progress = do_lower_jumps(shader->ir, true, true, 124 true, /* main return */ 125 false, /* continue */ 126 false /* loops */ 127 ) || progress; 128 129 progress = do_common_optimization(shader->ir, true, 32) || progress; 130 131 progress = lower_noise(shader->ir) || progress; 132 progress = 133 lower_variable_index_to_cond_assign(shader->ir, 134 GL_TRUE, /* input */ 135 GL_TRUE, /* output */ 136 GL_TRUE, /* temp */ 137 GL_TRUE /* uniform */ 138 ) || progress; 139 if (intel->gen == 6) { 140 progress = do_if_to_cond_assign(shader->ir) || progress; 141 } 142 } while (progress); 143 144 validate_ir_tree(shader->ir); 145 146 reparent_ir(shader->ir, shader->ir); 147 talloc_free(mem_ctx); 148 } 149 } 150 151 if (!_mesa_ir_link_shader(ctx, prog)) 152 return GL_FALSE; 153 154 return GL_TRUE; 155} 156 157static int 158type_size(const struct glsl_type *type) 159{ 160 unsigned int size, i; 161 162 switch (type->base_type) { 163 case GLSL_TYPE_UINT: 164 case GLSL_TYPE_INT: 165 case GLSL_TYPE_FLOAT: 166 case GLSL_TYPE_BOOL: 167 return type->components(); 168 case GLSL_TYPE_ARRAY: 169 return type_size(type->fields.array) * type->length; 170 case GLSL_TYPE_STRUCT: 171 size = 0; 172 for (i = 0; i < type->length; i++) { 173 size += type_size(type->fields.structure[i].type); 174 } 175 return size; 176 case GLSL_TYPE_SAMPLER: 177 /* Samplers take up no register space, since they're baked in at 178 * link time. 179 */ 180 return 0; 181 default: 182 assert(!"not reached"); 183 return 0; 184 } 185} 186 187static const fs_reg reg_undef; 188static const fs_reg reg_null(ARF, BRW_ARF_NULL); 189 190int 191fs_visitor::virtual_grf_alloc(int size) 192{ 193 if (virtual_grf_array_size <= virtual_grf_next) { 194 if (virtual_grf_array_size == 0) 195 virtual_grf_array_size = 16; 196 else 197 virtual_grf_array_size *= 2; 198 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 199 int, virtual_grf_array_size); 200 201 /* This slot is always unused. */ 202 virtual_grf_sizes[0] = 0; 203 } 204 virtual_grf_sizes[virtual_grf_next] = size; 205 return virtual_grf_next++; 206} 207 208/** Fixed HW reg constructor. */ 209fs_reg::fs_reg(enum register_file file, int hw_reg) 210{ 211 init(); 212 this->file = file; 213 this->hw_reg = hw_reg; 214 this->type = BRW_REGISTER_TYPE_F; 215} 216 217int 218brw_type_for_base_type(const struct glsl_type *type) 219{ 220 switch (type->base_type) { 221 case GLSL_TYPE_FLOAT: 222 return BRW_REGISTER_TYPE_F; 223 case GLSL_TYPE_INT: 224 case GLSL_TYPE_BOOL: 225 return BRW_REGISTER_TYPE_D; 226 case GLSL_TYPE_UINT: 227 return BRW_REGISTER_TYPE_UD; 228 case GLSL_TYPE_ARRAY: 229 case GLSL_TYPE_STRUCT: 230 /* These should be overridden with the type of the member when 231 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 232 * way to trip up if we don't. 233 */ 234 return BRW_REGISTER_TYPE_UD; 235 default: 236 assert(!"not reached"); 237 return BRW_REGISTER_TYPE_F; 238 } 239} 240 241/** Automatic reg constructor. */ 242fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 243{ 244 init(); 245 246 this->file = GRF; 247 this->reg = v->virtual_grf_alloc(type_size(type)); 248 this->reg_offset = 0; 249 this->type = brw_type_for_base_type(type); 250} 251 252fs_reg * 253fs_visitor::variable_storage(ir_variable *var) 254{ 255 return (fs_reg *)hash_table_find(this->variable_ht, var); 256} 257 258/* Our support for uniforms is piggy-backed on the struct 259 * gl_fragment_program, because that's where the values actually 260 * get stored, rather than in some global gl_shader_program uniform 261 * store. 262 */ 263int 264fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 265{ 266 unsigned int offset = 0; 267 float *vec_values; 268 269 if (type->is_matrix()) { 270 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 271 type->vector_elements, 272 1); 273 274 for (unsigned int i = 0; i < type->matrix_columns; i++) { 275 offset += setup_uniform_values(loc + offset, column); 276 } 277 278 return offset; 279 } 280 281 switch (type->base_type) { 282 case GLSL_TYPE_FLOAT: 283 case GLSL_TYPE_UINT: 284 case GLSL_TYPE_INT: 285 case GLSL_TYPE_BOOL: 286 vec_values = fp->Base.Parameters->ParameterValues[loc]; 287 for (unsigned int i = 0; i < type->vector_elements; i++) { 288 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 289 } 290 return 1; 291 292 case GLSL_TYPE_STRUCT: 293 for (unsigned int i = 0; i < type->length; i++) { 294 offset += setup_uniform_values(loc + offset, 295 type->fields.structure[i].type); 296 } 297 return offset; 298 299 case GLSL_TYPE_ARRAY: 300 for (unsigned int i = 0; i < type->length; i++) { 301 offset += setup_uniform_values(loc + offset, type->fields.array); 302 } 303 return offset; 304 305 case GLSL_TYPE_SAMPLER: 306 /* The sampler takes up a slot, but we don't use any values from it. */ 307 return 1; 308 309 default: 310 assert(!"not reached"); 311 return 0; 312 } 313} 314 315 316/* Our support for builtin uniforms is even scarier than non-builtin. 317 * It sits on top of the PROG_STATE_VAR parameters that are 318 * automatically updated from GL context state. 319 */ 320void 321fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 322{ 323 const struct gl_builtin_uniform_desc *statevar = NULL; 324 325 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 326 statevar = &_mesa_builtin_uniform_desc[i]; 327 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 328 break; 329 } 330 331 if (!statevar->name) { 332 this->fail = true; 333 printf("Failed to find builtin uniform `%s'\n", ir->name); 334 return; 335 } 336 337 int array_count; 338 if (ir->type->is_array()) { 339 array_count = ir->type->length; 340 } else { 341 array_count = 1; 342 } 343 344 for (int a = 0; a < array_count; a++) { 345 for (unsigned int i = 0; i < statevar->num_elements; i++) { 346 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 347 int tokens[STATE_LENGTH]; 348 349 memcpy(tokens, element->tokens, sizeof(element->tokens)); 350 if (ir->type->is_array()) { 351 tokens[1] = a; 352 } 353 354 /* This state reference has already been setup by ir_to_mesa, 355 * but we'll get the same index back here. 356 */ 357 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 358 (gl_state_index *)tokens); 359 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 360 361 /* Add each of the unique swizzles of the element as a 362 * parameter. This'll end up matching the expected layout of 363 * the array/matrix/structure we're trying to fill in. 364 */ 365 int last_swiz = -1; 366 for (unsigned int i = 0; i < 4; i++) { 367 int swiz = GET_SWZ(element->swizzle, i); 368 if (swiz == last_swiz) 369 break; 370 last_swiz = swiz; 371 372 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 373 } 374 } 375 } 376} 377 378fs_reg * 379fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 380{ 381 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 382 fs_reg wpos = *reg; 383 fs_reg neg_y = this->pixel_y; 384 neg_y.negate = true; 385 386 /* gl_FragCoord.x */ 387 if (ir->pixel_center_integer) { 388 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 389 } else { 390 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 391 } 392 wpos.reg_offset++; 393 394 /* gl_FragCoord.y */ 395 if (ir->origin_upper_left && ir->pixel_center_integer) { 396 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 397 } else { 398 fs_reg pixel_y = this->pixel_y; 399 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 400 401 if (!ir->origin_upper_left) { 402 pixel_y.negate = true; 403 offset += c->key.drawable_height - 1.0; 404 } 405 406 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 407 } 408 wpos.reg_offset++; 409 410 /* gl_FragCoord.z */ 411 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 412 interp_reg(FRAG_ATTRIB_WPOS, 2))); 413 wpos.reg_offset++; 414 415 /* gl_FragCoord.w: Already set up in emit_interpolation */ 416 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 417 418 return reg; 419} 420 421fs_reg * 422fs_visitor::emit_general_interpolation(ir_variable *ir) 423{ 424 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 425 /* Interpolation is always in floating point regs. */ 426 reg->type = BRW_REGISTER_TYPE_F; 427 fs_reg attr = *reg; 428 429 unsigned int array_elements; 430 const glsl_type *type; 431 432 if (ir->type->is_array()) { 433 array_elements = ir->type->length; 434 if (array_elements == 0) { 435 this->fail = true; 436 } 437 type = ir->type->fields.array; 438 } else { 439 array_elements = 1; 440 type = ir->type; 441 } 442 443 int location = ir->location; 444 for (unsigned int i = 0; i < array_elements; i++) { 445 for (unsigned int j = 0; j < type->matrix_columns; j++) { 446 if (urb_setup[location] == -1) { 447 /* If there's no incoming setup data for this slot, don't 448 * emit interpolation for it. 449 */ 450 attr.reg_offset += type->vector_elements; 451 location++; 452 continue; 453 } 454 455 for (unsigned int c = 0; c < type->vector_elements; c++) { 456 struct brw_reg interp = interp_reg(location, c); 457 emit(fs_inst(FS_OPCODE_LINTERP, 458 attr, 459 this->delta_x, 460 this->delta_y, 461 fs_reg(interp))); 462 attr.reg_offset++; 463 } 464 465 if (intel->gen < 6) { 466 attr.reg_offset -= type->vector_elements; 467 for (unsigned int c = 0; c < type->vector_elements; c++) { 468 emit(fs_inst(BRW_OPCODE_MUL, 469 attr, 470 attr, 471 this->pixel_w)); 472 attr.reg_offset++; 473 } 474 } 475 location++; 476 } 477 } 478 479 return reg; 480} 481 482fs_reg * 483fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 484{ 485 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 486 487 /* The frontfacing comes in as a bit in the thread payload. */ 488 if (intel->gen >= 6) { 489 emit(fs_inst(BRW_OPCODE_ASR, 490 *reg, 491 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 492 fs_reg(15))); 493 emit(fs_inst(BRW_OPCODE_NOT, 494 *reg, 495 *reg)); 496 emit(fs_inst(BRW_OPCODE_AND, 497 *reg, 498 *reg, 499 fs_reg(1))); 500 } else { 501 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 502 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 503 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 504 * us front face 505 */ 506 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 507 *reg, 508 fs_reg(r1_6ud), 509 fs_reg(1u << 31))); 510 inst->conditional_mod = BRW_CONDITIONAL_L; 511 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 512 } 513 514 return reg; 515} 516 517fs_inst * 518fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 519{ 520 switch (opcode) { 521 case FS_OPCODE_RCP: 522 case FS_OPCODE_RSQ: 523 case FS_OPCODE_SQRT: 524 case FS_OPCODE_EXP2: 525 case FS_OPCODE_LOG2: 526 case FS_OPCODE_SIN: 527 case FS_OPCODE_COS: 528 break; 529 default: 530 assert(!"not reached: bad math opcode"); 531 return NULL; 532 } 533 534 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 535 * might be able to do better by doing execsize = 1 math and then 536 * expanding that result out, but we would need to be careful with 537 * masking. 538 */ 539 if (intel->gen >= 6 && src.file == UNIFORM) { 540 fs_reg expanded = fs_reg(this, glsl_type::float_type); 541 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 542 src = expanded; 543 } 544 545 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 546 547 if (intel->gen < 6) { 548 inst->base_mrf = 2; 549 inst->mlen = 1; 550 } 551 552 return inst; 553} 554 555fs_inst * 556fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 557{ 558 int base_mrf = 2; 559 fs_inst *inst; 560 561 assert(opcode == FS_OPCODE_POW); 562 563 if (intel->gen >= 6) { 564 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 565 if (src0.file == UNIFORM) { 566 fs_reg expanded = fs_reg(this, glsl_type::float_type); 567 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 568 src0 = expanded; 569 } 570 571 if (src1.file == UNIFORM) { 572 fs_reg expanded = fs_reg(this, glsl_type::float_type); 573 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 574 src1 = expanded; 575 } 576 577 inst = emit(fs_inst(opcode, dst, src0, src1)); 578 } else { 579 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 580 inst = emit(fs_inst(opcode, dst, src0, reg_null)); 581 582 inst->base_mrf = base_mrf; 583 inst->mlen = 2; 584 } 585 return inst; 586} 587 588void 589fs_visitor::visit(ir_variable *ir) 590{ 591 fs_reg *reg = NULL; 592 593 if (variable_storage(ir)) 594 return; 595 596 if (strcmp(ir->name, "gl_FragColor") == 0) { 597 this->frag_color = ir; 598 } else if (strcmp(ir->name, "gl_FragData") == 0) { 599 this->frag_data = ir; 600 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 601 this->frag_depth = ir; 602 } 603 604 if (ir->mode == ir_var_in) { 605 if (!strcmp(ir->name, "gl_FragCoord")) { 606 reg = emit_fragcoord_interpolation(ir); 607 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 608 reg = emit_frontfacing_interpolation(ir); 609 } else { 610 reg = emit_general_interpolation(ir); 611 } 612 assert(reg); 613 hash_table_insert(this->variable_ht, reg, ir); 614 return; 615 } 616 617 if (ir->mode == ir_var_uniform) { 618 int param_index = c->prog_data.nr_params; 619 620 if (!strncmp(ir->name, "gl_", 3)) { 621 setup_builtin_uniform_values(ir); 622 } else { 623 setup_uniform_values(ir->location, ir->type); 624 } 625 626 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 627 } 628 629 if (!reg) 630 reg = new(this->mem_ctx) fs_reg(this, ir->type); 631 632 hash_table_insert(this->variable_ht, reg, ir); 633} 634 635void 636fs_visitor::visit(ir_dereference_variable *ir) 637{ 638 fs_reg *reg = variable_storage(ir->var); 639 this->result = *reg; 640} 641 642void 643fs_visitor::visit(ir_dereference_record *ir) 644{ 645 const glsl_type *struct_type = ir->record->type; 646 647 ir->record->accept(this); 648 649 unsigned int offset = 0; 650 for (unsigned int i = 0; i < struct_type->length; i++) { 651 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 652 break; 653 offset += type_size(struct_type->fields.structure[i].type); 654 } 655 this->result.reg_offset += offset; 656 this->result.type = brw_type_for_base_type(ir->type); 657} 658 659void 660fs_visitor::visit(ir_dereference_array *ir) 661{ 662 ir_constant *index; 663 int element_size; 664 665 ir->array->accept(this); 666 index = ir->array_index->as_constant(); 667 668 element_size = type_size(ir->type); 669 this->result.type = brw_type_for_base_type(ir->type); 670 671 if (index) { 672 assert(this->result.file == UNIFORM || 673 (this->result.file == GRF && 674 this->result.reg != 0)); 675 this->result.reg_offset += index->value.i[0] * element_size; 676 } else { 677 assert(!"FINISHME: non-constant array element"); 678 } 679} 680 681void 682fs_visitor::visit(ir_expression *ir) 683{ 684 unsigned int operand; 685 fs_reg op[2], temp; 686 fs_reg result; 687 fs_inst *inst; 688 689 for (operand = 0; operand < ir->get_num_operands(); operand++) { 690 ir->operands[operand]->accept(this); 691 if (this->result.file == BAD_FILE) { 692 ir_print_visitor v; 693 printf("Failed to get tree for expression operand:\n"); 694 ir->operands[operand]->accept(&v); 695 this->fail = true; 696 } 697 op[operand] = this->result; 698 699 /* Matrix expression operands should have been broken down to vector 700 * operations already. 701 */ 702 assert(!ir->operands[operand]->type->is_matrix()); 703 /* And then those vector operands should have been broken down to scalar. 704 */ 705 assert(!ir->operands[operand]->type->is_vector()); 706 } 707 708 /* Storage for our result. If our result goes into an assignment, it will 709 * just get copy-propagated out, so no worries. 710 */ 711 this->result = fs_reg(this, ir->type); 712 713 switch (ir->operation) { 714 case ir_unop_logic_not: 715 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 716 break; 717 case ir_unop_neg: 718 op[0].negate = !op[0].negate; 719 this->result = op[0]; 720 break; 721 case ir_unop_abs: 722 op[0].abs = true; 723 this->result = op[0]; 724 break; 725 case ir_unop_sign: 726 temp = fs_reg(this, ir->type); 727 728 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 729 730 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 731 inst->conditional_mod = BRW_CONDITIONAL_G; 732 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 733 inst->predicated = true; 734 735 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 736 inst->conditional_mod = BRW_CONDITIONAL_L; 737 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 738 inst->predicated = true; 739 740 break; 741 case ir_unop_rcp: 742 emit_math(FS_OPCODE_RCP, this->result, op[0]); 743 break; 744 745 case ir_unop_exp2: 746 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 747 break; 748 case ir_unop_log2: 749 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 750 break; 751 case ir_unop_exp: 752 case ir_unop_log: 753 assert(!"not reached: should be handled by ir_explog_to_explog2"); 754 break; 755 case ir_unop_sin: 756 emit_math(FS_OPCODE_SIN, this->result, op[0]); 757 break; 758 case ir_unop_cos: 759 emit_math(FS_OPCODE_COS, this->result, op[0]); 760 break; 761 762 case ir_unop_dFdx: 763 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 764 break; 765 case ir_unop_dFdy: 766 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 767 break; 768 769 case ir_binop_add: 770 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 771 break; 772 case ir_binop_sub: 773 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 774 break; 775 776 case ir_binop_mul: 777 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 778 break; 779 case ir_binop_div: 780 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 781 break; 782 case ir_binop_mod: 783 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 784 break; 785 786 case ir_binop_less: 787 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 788 inst->conditional_mod = BRW_CONDITIONAL_L; 789 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 790 break; 791 case ir_binop_greater: 792 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 793 inst->conditional_mod = BRW_CONDITIONAL_G; 794 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 795 break; 796 case ir_binop_lequal: 797 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 798 inst->conditional_mod = BRW_CONDITIONAL_LE; 799 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 800 break; 801 case ir_binop_gequal: 802 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 803 inst->conditional_mod = BRW_CONDITIONAL_GE; 804 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 805 break; 806 case ir_binop_equal: 807 case ir_binop_all_equal: /* same as nequal for scalars */ 808 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 809 inst->conditional_mod = BRW_CONDITIONAL_Z; 810 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 811 break; 812 case ir_binop_nequal: 813 case ir_binop_any_nequal: /* same as nequal for scalars */ 814 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 815 inst->conditional_mod = BRW_CONDITIONAL_NZ; 816 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 817 break; 818 819 case ir_binop_logic_xor: 820 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 821 break; 822 823 case ir_binop_logic_or: 824 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 825 break; 826 827 case ir_binop_logic_and: 828 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 829 break; 830 831 case ir_binop_dot: 832 case ir_binop_cross: 833 case ir_unop_any: 834 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 835 break; 836 837 case ir_unop_noise: 838 assert(!"not reached: should be handled by lower_noise"); 839 break; 840 841 case ir_unop_sqrt: 842 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 843 break; 844 845 case ir_unop_rsq: 846 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 847 break; 848 849 case ir_unop_i2f: 850 case ir_unop_b2f: 851 case ir_unop_b2i: 852 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 853 break; 854 case ir_unop_f2i: 855 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 856 break; 857 case ir_unop_f2b: 858 case ir_unop_i2b: 859 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 860 inst->conditional_mod = BRW_CONDITIONAL_NZ; 861 862 case ir_unop_trunc: 863 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 864 break; 865 case ir_unop_ceil: 866 op[0].negate = ~op[0].negate; 867 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 868 this->result.negate = true; 869 break; 870 case ir_unop_floor: 871 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 872 break; 873 case ir_unop_fract: 874 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 875 break; 876 877 case ir_binop_min: 878 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 879 inst->conditional_mod = BRW_CONDITIONAL_L; 880 881 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 882 inst->predicated = true; 883 break; 884 case ir_binop_max: 885 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 886 inst->conditional_mod = BRW_CONDITIONAL_G; 887 888 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 889 inst->predicated = true; 890 break; 891 892 case ir_binop_pow: 893 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 894 break; 895 896 case ir_unop_bit_not: 897 case ir_unop_u2f: 898 case ir_binop_lshift: 899 case ir_binop_rshift: 900 case ir_binop_bit_and: 901 case ir_binop_bit_xor: 902 case ir_binop_bit_or: 903 assert(!"GLSL 1.30 features unsupported"); 904 break; 905 } 906} 907 908void 909fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 910 const glsl_type *type, bool predicated) 911{ 912 switch (type->base_type) { 913 case GLSL_TYPE_FLOAT: 914 case GLSL_TYPE_UINT: 915 case GLSL_TYPE_INT: 916 case GLSL_TYPE_BOOL: 917 for (unsigned int i = 0; i < type->components(); i++) { 918 l.type = brw_type_for_base_type(type); 919 r.type = brw_type_for_base_type(type); 920 921 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 922 inst->predicated = predicated; 923 924 l.reg_offset++; 925 r.reg_offset++; 926 } 927 break; 928 case GLSL_TYPE_ARRAY: 929 for (unsigned int i = 0; i < type->length; i++) { 930 emit_assignment_writes(l, r, type->fields.array, predicated); 931 } 932 933 case GLSL_TYPE_STRUCT: 934 for (unsigned int i = 0; i < type->length; i++) { 935 emit_assignment_writes(l, r, type->fields.structure[i].type, 936 predicated); 937 } 938 break; 939 940 case GLSL_TYPE_SAMPLER: 941 break; 942 943 default: 944 assert(!"not reached"); 945 break; 946 } 947} 948 949void 950fs_visitor::visit(ir_assignment *ir) 951{ 952 struct fs_reg l, r; 953 fs_inst *inst; 954 955 /* FINISHME: arrays on the lhs */ 956 ir->lhs->accept(this); 957 l = this->result; 958 959 ir->rhs->accept(this); 960 r = this->result; 961 962 assert(l.file != BAD_FILE); 963 assert(r.file != BAD_FILE); 964 965 if (ir->condition) { 966 /* Get the condition bool into the predicate. */ 967 ir->condition->accept(this); 968 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 969 inst->conditional_mod = BRW_CONDITIONAL_NZ; 970 } 971 972 if (ir->lhs->type->is_scalar() || 973 ir->lhs->type->is_vector()) { 974 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 975 if (ir->write_mask & (1 << i)) { 976 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 977 if (ir->condition) 978 inst->predicated = true; 979 r.reg_offset++; 980 } 981 l.reg_offset++; 982 } 983 } else { 984 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 985 } 986} 987 988fs_inst * 989fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 990{ 991 int mlen; 992 int base_mrf = 1; 993 bool simd16 = false; 994 fs_reg orig_dst; 995 996 /* g0 header. */ 997 mlen = 1; 998 999 if (ir->shadow_comparitor) { 1000 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1001 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1002 coordinate)); 1003 coordinate.reg_offset++; 1004 } 1005 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1006 mlen += 3; 1007 1008 if (ir->op == ir_tex) { 1009 /* There's no plain shadow compare message, so we use shadow 1010 * compare with a bias of 0.0. 1011 */ 1012 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1013 fs_reg(0.0f))); 1014 mlen++; 1015 } else if (ir->op == ir_txb) { 1016 ir->lod_info.bias->accept(this); 1017 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1018 this->result)); 1019 mlen++; 1020 } else { 1021 assert(ir->op == ir_txl); 1022 ir->lod_info.lod->accept(this); 1023 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1024 this->result)); 1025 mlen++; 1026 } 1027 1028 ir->shadow_comparitor->accept(this); 1029 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1030 mlen++; 1031 } else if (ir->op == ir_tex) { 1032 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1033 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1034 coordinate)); 1035 coordinate.reg_offset++; 1036 } 1037 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1038 mlen += 3; 1039 } else { 1040 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1041 * instructions. We'll need to do SIMD16 here. 1042 */ 1043 assert(ir->op == ir_txb || ir->op == ir_txl); 1044 1045 for (int i = 0; i < ir->coordinate->type->vector_elements * 2;) { 1046 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1047 coordinate)); 1048 coordinate.reg_offset++; 1049 } 1050 1051 /* lod/bias appears after u/v/r. */ 1052 mlen += 6; 1053 1054 if (ir->op == ir_txb) { 1055 ir->lod_info.bias->accept(this); 1056 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1057 this->result)); 1058 mlen++; 1059 } else { 1060 ir->lod_info.lod->accept(this); 1061 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1062 this->result)); 1063 mlen++; 1064 } 1065 1066 /* The unused upper half. */ 1067 mlen++; 1068 1069 /* Now, since we're doing simd16, the return is 2 interleaved 1070 * vec4s where the odd-indexed ones are junk. We'll need to move 1071 * this weirdness around to the expected layout. 1072 */ 1073 simd16 = true; 1074 orig_dst = dst; 1075 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1076 2)); 1077 dst.type = BRW_REGISTER_TYPE_F; 1078 } 1079 1080 fs_inst *inst = NULL; 1081 switch (ir->op) { 1082 case ir_tex: 1083 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1084 break; 1085 case ir_txb: 1086 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1087 break; 1088 case ir_txl: 1089 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1090 break; 1091 case ir_txd: 1092 case ir_txf: 1093 assert(!"GLSL 1.30 features unsupported"); 1094 break; 1095 } 1096 inst->base_mrf = base_mrf; 1097 inst->mlen = mlen; 1098 1099 if (simd16) { 1100 for (int i = 0; i < 4; i++) { 1101 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1102 orig_dst.reg_offset++; 1103 dst.reg_offset += 2; 1104 } 1105 } 1106 1107 return inst; 1108} 1109 1110fs_inst * 1111fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1112{ 1113 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1114 * optional parameters like shadow comparitor or LOD bias. If 1115 * optional parameters aren't present, those base slots are 1116 * optional and don't need to be included in the message. 1117 * 1118 * We don't fill in the unnecessary slots regardless, which may 1119 * look surprising in the disassembly. 1120 */ 1121 int mlen = 1; /* g0 header always present. */ 1122 int base_mrf = 1; 1123 1124 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1125 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1126 coordinate)); 1127 coordinate.reg_offset++; 1128 } 1129 mlen += ir->coordinate->type->vector_elements; 1130 1131 if (ir->shadow_comparitor) { 1132 mlen = MAX2(mlen, 5); 1133 1134 ir->shadow_comparitor->accept(this); 1135 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1136 mlen++; 1137 } 1138 1139 fs_inst *inst = NULL; 1140 switch (ir->op) { 1141 case ir_tex: 1142 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1143 break; 1144 case ir_txb: 1145 ir->lod_info.bias->accept(this); 1146 mlen = MAX2(mlen, 5); 1147 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1148 mlen++; 1149 1150 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1151 break; 1152 case ir_txl: 1153 ir->lod_info.lod->accept(this); 1154 mlen = MAX2(mlen, 5); 1155 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1156 mlen++; 1157 1158 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1159 break; 1160 case ir_txd: 1161 case ir_txf: 1162 assert(!"GLSL 1.30 features unsupported"); 1163 break; 1164 } 1165 inst->base_mrf = base_mrf; 1166 inst->mlen = mlen; 1167 1168 return inst; 1169} 1170 1171void 1172fs_visitor::visit(ir_texture *ir) 1173{ 1174 fs_inst *inst = NULL; 1175 1176 ir->coordinate->accept(this); 1177 fs_reg coordinate = this->result; 1178 1179 /* Should be lowered by do_lower_texture_projection */ 1180 assert(!ir->projector); 1181 1182 /* Writemasking doesn't eliminate channels on SIMD8 texture 1183 * samples, so don't worry about them. 1184 */ 1185 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1186 1187 if (intel->gen < 5) { 1188 inst = emit_texture_gen4(ir, dst, coordinate); 1189 } else { 1190 inst = emit_texture_gen5(ir, dst, coordinate); 1191 } 1192 1193 inst->sampler = 1194 _mesa_get_sampler_uniform_value(ir->sampler, 1195 ctx->Shader.CurrentProgram, 1196 &brw->fragment_program->Base); 1197 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1198 1199 this->result = dst; 1200 1201 if (ir->shadow_comparitor) 1202 inst->shadow_compare = true; 1203 1204 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1205 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1206 1207 for (int i = 0; i < 4; i++) { 1208 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1209 fs_reg l = swizzle_dst; 1210 l.reg_offset += i; 1211 1212 if (swiz == SWIZZLE_ZERO) { 1213 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1214 } else if (swiz == SWIZZLE_ONE) { 1215 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1216 } else { 1217 fs_reg r = dst; 1218 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1219 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1220 } 1221 } 1222 this->result = swizzle_dst; 1223 } 1224} 1225 1226void 1227fs_visitor::visit(ir_swizzle *ir) 1228{ 1229 ir->val->accept(this); 1230 fs_reg val = this->result; 1231 1232 if (ir->type->vector_elements == 1) { 1233 this->result.reg_offset += ir->mask.x; 1234 return; 1235 } 1236 1237 fs_reg result = fs_reg(this, ir->type); 1238 this->result = result; 1239 1240 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1241 fs_reg channel = val; 1242 int swiz = 0; 1243 1244 switch (i) { 1245 case 0: 1246 swiz = ir->mask.x; 1247 break; 1248 case 1: 1249 swiz = ir->mask.y; 1250 break; 1251 case 2: 1252 swiz = ir->mask.z; 1253 break; 1254 case 3: 1255 swiz = ir->mask.w; 1256 break; 1257 } 1258 1259 channel.reg_offset += swiz; 1260 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1261 result.reg_offset++; 1262 } 1263} 1264 1265void 1266fs_visitor::visit(ir_discard *ir) 1267{ 1268 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1269 1270 assert(ir->condition == NULL); /* FINISHME */ 1271 1272 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null)); 1273 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp)); 1274 kill_emitted = true; 1275} 1276 1277void 1278fs_visitor::visit(ir_constant *ir) 1279{ 1280 fs_reg reg(this, ir->type); 1281 this->result = reg; 1282 1283 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1284 switch (ir->type->base_type) { 1285 case GLSL_TYPE_FLOAT: 1286 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1287 break; 1288 case GLSL_TYPE_UINT: 1289 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1290 break; 1291 case GLSL_TYPE_INT: 1292 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1293 break; 1294 case GLSL_TYPE_BOOL: 1295 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1296 break; 1297 default: 1298 assert(!"Non-float/uint/int/bool constant"); 1299 } 1300 reg.reg_offset++; 1301 } 1302} 1303 1304void 1305fs_visitor::visit(ir_if *ir) 1306{ 1307 fs_inst *inst; 1308 1309 /* Don't point the annotation at the if statement, because then it plus 1310 * the then and else blocks get printed. 1311 */ 1312 this->base_ir = ir->condition; 1313 1314 /* Generate the condition into the condition code. */ 1315 ir->condition->accept(this); 1316 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1317 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1318 1319 inst = emit(fs_inst(BRW_OPCODE_IF)); 1320 inst->predicated = true; 1321 1322 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1323 ir_instruction *ir = (ir_instruction *)iter.get(); 1324 this->base_ir = ir; 1325 1326 ir->accept(this); 1327 } 1328 1329 if (!ir->else_instructions.is_empty()) { 1330 emit(fs_inst(BRW_OPCODE_ELSE)); 1331 1332 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1333 ir_instruction *ir = (ir_instruction *)iter.get(); 1334 this->base_ir = ir; 1335 1336 ir->accept(this); 1337 } 1338 } 1339 1340 emit(fs_inst(BRW_OPCODE_ENDIF)); 1341} 1342 1343void 1344fs_visitor::visit(ir_loop *ir) 1345{ 1346 fs_reg counter = reg_undef; 1347 1348 if (ir->counter) { 1349 this->base_ir = ir->counter; 1350 ir->counter->accept(this); 1351 counter = *(variable_storage(ir->counter)); 1352 1353 if (ir->from) { 1354 this->base_ir = ir->from; 1355 ir->from->accept(this); 1356 1357 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1358 } 1359 } 1360 1361 emit(fs_inst(BRW_OPCODE_DO)); 1362 1363 if (ir->to) { 1364 this->base_ir = ir->to; 1365 ir->to->accept(this); 1366 1367 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1368 counter, this->result)); 1369 switch (ir->cmp) { 1370 case ir_binop_equal: 1371 inst->conditional_mod = BRW_CONDITIONAL_Z; 1372 break; 1373 case ir_binop_nequal: 1374 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1375 break; 1376 case ir_binop_gequal: 1377 inst->conditional_mod = BRW_CONDITIONAL_GE; 1378 break; 1379 case ir_binop_lequal: 1380 inst->conditional_mod = BRW_CONDITIONAL_LE; 1381 break; 1382 case ir_binop_greater: 1383 inst->conditional_mod = BRW_CONDITIONAL_G; 1384 break; 1385 case ir_binop_less: 1386 inst->conditional_mod = BRW_CONDITIONAL_L; 1387 break; 1388 default: 1389 assert(!"not reached: unknown loop condition"); 1390 this->fail = true; 1391 break; 1392 } 1393 1394 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1395 inst->predicated = true; 1396 } 1397 1398 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1399 ir_instruction *ir = (ir_instruction *)iter.get(); 1400 1401 this->base_ir = ir; 1402 ir->accept(this); 1403 } 1404 1405 if (ir->increment) { 1406 this->base_ir = ir->increment; 1407 ir->increment->accept(this); 1408 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1409 } 1410 1411 emit(fs_inst(BRW_OPCODE_WHILE)); 1412} 1413 1414void 1415fs_visitor::visit(ir_loop_jump *ir) 1416{ 1417 switch (ir->mode) { 1418 case ir_loop_jump::jump_break: 1419 emit(fs_inst(BRW_OPCODE_BREAK)); 1420 break; 1421 case ir_loop_jump::jump_continue: 1422 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1423 break; 1424 } 1425} 1426 1427void 1428fs_visitor::visit(ir_call *ir) 1429{ 1430 assert(!"FINISHME"); 1431} 1432 1433void 1434fs_visitor::visit(ir_return *ir) 1435{ 1436 assert(!"FINISHME"); 1437} 1438 1439void 1440fs_visitor::visit(ir_function *ir) 1441{ 1442 /* Ignore function bodies other than main() -- we shouldn't see calls to 1443 * them since they should all be inlined before we get to ir_to_mesa. 1444 */ 1445 if (strcmp(ir->name, "main") == 0) { 1446 const ir_function_signature *sig; 1447 exec_list empty; 1448 1449 sig = ir->matching_signature(&empty); 1450 1451 assert(sig); 1452 1453 foreach_iter(exec_list_iterator, iter, sig->body) { 1454 ir_instruction *ir = (ir_instruction *)iter.get(); 1455 this->base_ir = ir; 1456 1457 ir->accept(this); 1458 } 1459 } 1460} 1461 1462void 1463fs_visitor::visit(ir_function_signature *ir) 1464{ 1465 assert(!"not reached"); 1466 (void)ir; 1467} 1468 1469fs_inst * 1470fs_visitor::emit(fs_inst inst) 1471{ 1472 fs_inst *list_inst = new(mem_ctx) fs_inst; 1473 *list_inst = inst; 1474 1475 list_inst->annotation = this->current_annotation; 1476 list_inst->ir = this->base_ir; 1477 1478 this->instructions.push_tail(list_inst); 1479 1480 return list_inst; 1481} 1482 1483/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1484void 1485fs_visitor::emit_dummy_fs() 1486{ 1487 /* Everyone's favorite color. */ 1488 emit(fs_inst(BRW_OPCODE_MOV, 1489 fs_reg(MRF, 2), 1490 fs_reg(1.0f))); 1491 emit(fs_inst(BRW_OPCODE_MOV, 1492 fs_reg(MRF, 3), 1493 fs_reg(0.0f))); 1494 emit(fs_inst(BRW_OPCODE_MOV, 1495 fs_reg(MRF, 4), 1496 fs_reg(1.0f))); 1497 emit(fs_inst(BRW_OPCODE_MOV, 1498 fs_reg(MRF, 5), 1499 fs_reg(0.0f))); 1500 1501 fs_inst *write; 1502 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1503 fs_reg(0), 1504 fs_reg(0))); 1505 write->base_mrf = 0; 1506} 1507 1508/* The register location here is relative to the start of the URB 1509 * data. It will get adjusted to be a real location before 1510 * generate_code() time. 1511 */ 1512struct brw_reg 1513fs_visitor::interp_reg(int location, int channel) 1514{ 1515 int regnr = urb_setup[location] * 2 + channel / 2; 1516 int stride = (channel & 1) * 4; 1517 1518 assert(urb_setup[location] != -1); 1519 1520 return brw_vec1_grf(regnr, stride); 1521} 1522 1523/** Emits the interpolation for the varying inputs. */ 1524void 1525fs_visitor::emit_interpolation_setup_gen4() 1526{ 1527 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1528 1529 this->current_annotation = "compute pixel centers"; 1530 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1531 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1532 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1533 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1534 emit(fs_inst(BRW_OPCODE_ADD, 1535 this->pixel_x, 1536 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1537 fs_reg(brw_imm_v(0x10101010)))); 1538 emit(fs_inst(BRW_OPCODE_ADD, 1539 this->pixel_y, 1540 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1541 fs_reg(brw_imm_v(0x11001100)))); 1542 1543 this->current_annotation = "compute pixel deltas from v0"; 1544 if (brw->has_pln) { 1545 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1546 this->delta_y = this->delta_x; 1547 this->delta_y.reg_offset++; 1548 } else { 1549 this->delta_x = fs_reg(this, glsl_type::float_type); 1550 this->delta_y = fs_reg(this, glsl_type::float_type); 1551 } 1552 emit(fs_inst(BRW_OPCODE_ADD, 1553 this->delta_x, 1554 this->pixel_x, 1555 fs_reg(negate(brw_vec1_grf(1, 0))))); 1556 emit(fs_inst(BRW_OPCODE_ADD, 1557 this->delta_y, 1558 this->pixel_y, 1559 fs_reg(negate(brw_vec1_grf(1, 1))))); 1560 1561 this->current_annotation = "compute pos.w and 1/pos.w"; 1562 /* Compute wpos.w. It's always in our setup, since it's needed to 1563 * interpolate the other attributes. 1564 */ 1565 this->wpos_w = fs_reg(this, glsl_type::float_type); 1566 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1567 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1568 /* Compute the pixel 1/W value from wpos.w. */ 1569 this->pixel_w = fs_reg(this, glsl_type::float_type); 1570 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1571 this->current_annotation = NULL; 1572} 1573 1574/** Emits the interpolation for the varying inputs. */ 1575void 1576fs_visitor::emit_interpolation_setup_gen6() 1577{ 1578 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1579 1580 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1581 this->current_annotation = "compute pixel centers"; 1582 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1583 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1584 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1585 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1586 emit(fs_inst(BRW_OPCODE_ADD, 1587 int_pixel_x, 1588 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1589 fs_reg(brw_imm_v(0x10101010)))); 1590 emit(fs_inst(BRW_OPCODE_ADD, 1591 int_pixel_y, 1592 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1593 fs_reg(brw_imm_v(0x11001100)))); 1594 1595 /* As of gen6, we can no longer mix float and int sources. We have 1596 * to turn the integer pixel centers into floats for their actual 1597 * use. 1598 */ 1599 this->pixel_x = fs_reg(this, glsl_type::float_type); 1600 this->pixel_y = fs_reg(this, glsl_type::float_type); 1601 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1602 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1603 1604 this->current_annotation = "compute 1/pos.w"; 1605 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1606 this->pixel_w = fs_reg(this, glsl_type::float_type); 1607 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1608 1609 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1610 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1611 1612 this->current_annotation = NULL; 1613} 1614 1615void 1616fs_visitor::emit_fb_writes() 1617{ 1618 this->current_annotation = "FB write header"; 1619 GLboolean header_present = GL_TRUE; 1620 int nr = 0; 1621 1622 if (intel->gen >= 6 && 1623 !this->kill_emitted && 1624 c->key.nr_color_regions == 1) { 1625 header_present = false; 1626 } 1627 1628 if (header_present) { 1629 /* m0, m1 header */ 1630 nr += 2; 1631 } 1632 1633 if (c->key.aa_dest_stencil_reg) { 1634 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1635 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1636 } 1637 1638 /* Reserve space for color. It'll be filled in per MRT below. */ 1639 int color_mrf = nr; 1640 nr += 4; 1641 1642 if (c->key.source_depth_to_render_target) { 1643 if (c->key.computes_depth) { 1644 /* Hand over gl_FragDepth. */ 1645 assert(this->frag_depth); 1646 fs_reg depth = *(variable_storage(this->frag_depth)); 1647 1648 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1649 } else { 1650 /* Pass through the payload depth. */ 1651 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1652 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1653 } 1654 } 1655 1656 if (c->key.dest_depth_reg) { 1657 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1658 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1659 } 1660 1661 fs_reg color = reg_undef; 1662 if (this->frag_color) 1663 color = *(variable_storage(this->frag_color)); 1664 else if (this->frag_data) 1665 color = *(variable_storage(this->frag_data)); 1666 1667 for (int target = 0; target < c->key.nr_color_regions; target++) { 1668 this->current_annotation = talloc_asprintf(this->mem_ctx, 1669 "FB write target %d", 1670 target); 1671 if (this->frag_color || this->frag_data) { 1672 for (int i = 0; i < 4; i++) { 1673 emit(fs_inst(BRW_OPCODE_MOV, 1674 fs_reg(MRF, color_mrf + i), 1675 color)); 1676 color.reg_offset++; 1677 } 1678 } 1679 1680 if (this->frag_color) 1681 color.reg_offset -= 4; 1682 1683 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1684 reg_undef, reg_undef)); 1685 inst->target = target; 1686 inst->base_mrf = 0; 1687 inst->mlen = nr; 1688 if (target == c->key.nr_color_regions - 1) 1689 inst->eot = true; 1690 inst->header_present = header_present; 1691 } 1692 1693 if (c->key.nr_color_regions == 0) { 1694 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1695 reg_undef, reg_undef)); 1696 inst->base_mrf = 0; 1697 inst->mlen = nr; 1698 inst->eot = true; 1699 inst->header_present = header_present; 1700 } 1701 1702 this->current_annotation = NULL; 1703} 1704 1705void 1706fs_visitor::generate_fb_write(fs_inst *inst) 1707{ 1708 GLboolean eot = inst->eot; 1709 struct brw_reg implied_header; 1710 1711 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1712 * move, here's g1. 1713 */ 1714 brw_push_insn_state(p); 1715 brw_set_mask_control(p, BRW_MASK_DISABLE); 1716 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1717 1718 if (inst->header_present) { 1719 if (intel->gen >= 6) { 1720 brw_MOV(p, 1721 brw_message_reg(inst->base_mrf), 1722 brw_vec8_grf(0, 0)); 1723 implied_header = brw_null_reg(); 1724 } else { 1725 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 1726 } 1727 1728 brw_MOV(p, 1729 brw_message_reg(inst->base_mrf + 1), 1730 brw_vec8_grf(1, 0)); 1731 } else { 1732 implied_header = brw_null_reg(); 1733 } 1734 1735 brw_pop_insn_state(p); 1736 1737 brw_fb_WRITE(p, 1738 8, /* dispatch_width */ 1739 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1740 inst->base_mrf, 1741 implied_header, 1742 inst->target, 1743 inst->mlen, 1744 0, 1745 eot); 1746} 1747 1748void 1749fs_visitor::generate_linterp(fs_inst *inst, 1750 struct brw_reg dst, struct brw_reg *src) 1751{ 1752 struct brw_reg delta_x = src[0]; 1753 struct brw_reg delta_y = src[1]; 1754 struct brw_reg interp = src[2]; 1755 1756 if (brw->has_pln && 1757 delta_y.nr == delta_x.nr + 1 && 1758 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1759 brw_PLN(p, dst, interp, delta_x); 1760 } else { 1761 brw_LINE(p, brw_null_reg(), interp, delta_x); 1762 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1763 } 1764} 1765 1766void 1767fs_visitor::generate_math(fs_inst *inst, 1768 struct brw_reg dst, struct brw_reg *src) 1769{ 1770 int op; 1771 1772 switch (inst->opcode) { 1773 case FS_OPCODE_RCP: 1774 op = BRW_MATH_FUNCTION_INV; 1775 break; 1776 case FS_OPCODE_RSQ: 1777 op = BRW_MATH_FUNCTION_RSQ; 1778 break; 1779 case FS_OPCODE_SQRT: 1780 op = BRW_MATH_FUNCTION_SQRT; 1781 break; 1782 case FS_OPCODE_EXP2: 1783 op = BRW_MATH_FUNCTION_EXP; 1784 break; 1785 case FS_OPCODE_LOG2: 1786 op = BRW_MATH_FUNCTION_LOG; 1787 break; 1788 case FS_OPCODE_POW: 1789 op = BRW_MATH_FUNCTION_POW; 1790 break; 1791 case FS_OPCODE_SIN: 1792 op = BRW_MATH_FUNCTION_SIN; 1793 break; 1794 case FS_OPCODE_COS: 1795 op = BRW_MATH_FUNCTION_COS; 1796 break; 1797 default: 1798 assert(!"not reached: unknown math function"); 1799 op = 0; 1800 break; 1801 } 1802 1803 if (intel->gen >= 6) { 1804 assert(inst->mlen == 0); 1805 1806 if (inst->opcode == FS_OPCODE_POW) { 1807 brw_math2(p, dst, op, src[0], src[1]); 1808 } else { 1809 brw_math(p, dst, 1810 op, 1811 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1812 BRW_MATH_SATURATE_NONE, 1813 0, src[0], 1814 BRW_MATH_DATA_VECTOR, 1815 BRW_MATH_PRECISION_FULL); 1816 } 1817 } else { 1818 assert(inst->mlen >= 1); 1819 1820 brw_math(p, dst, 1821 op, 1822 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1823 BRW_MATH_SATURATE_NONE, 1824 inst->base_mrf, src[0], 1825 BRW_MATH_DATA_VECTOR, 1826 BRW_MATH_PRECISION_FULL); 1827 } 1828} 1829 1830void 1831fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 1832{ 1833 int msg_type = -1; 1834 int rlen = 4; 1835 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 1836 1837 if (intel->gen >= 5) { 1838 switch (inst->opcode) { 1839 case FS_OPCODE_TEX: 1840 if (inst->shadow_compare) { 1841 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1842 } else { 1843 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1844 } 1845 break; 1846 case FS_OPCODE_TXB: 1847 if (inst->shadow_compare) { 1848 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1849 } else { 1850 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1851 } 1852 break; 1853 } 1854 } else { 1855 switch (inst->opcode) { 1856 case FS_OPCODE_TEX: 1857 /* Note that G45 and older determines shadow compare and dispatch width 1858 * from message length for most messages. 1859 */ 1860 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 1861 if (inst->shadow_compare) { 1862 assert(inst->mlen == 5); 1863 } else { 1864 assert(inst->mlen <= 6); 1865 } 1866 break; 1867 case FS_OPCODE_TXB: 1868 if (inst->shadow_compare) { 1869 assert(inst->mlen == 5); 1870 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 1871 } else { 1872 assert(inst->mlen == 8); 1873 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1874 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 1875 } 1876 break; 1877 } 1878 } 1879 assert(msg_type != -1); 1880 1881 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 1882 rlen = 8; 1883 dst = vec16(dst); 1884 } 1885 1886 brw_SAMPLE(p, 1887 retype(dst, BRW_REGISTER_TYPE_UW), 1888 inst->base_mrf, 1889 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1890 SURF_INDEX_TEXTURE(inst->sampler), 1891 inst->sampler, 1892 WRITEMASK_XYZW, 1893 msg_type, 1894 rlen, 1895 inst->mlen, 1896 0, 1897 1, 1898 simd_mode); 1899} 1900 1901 1902/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1903 * looking like: 1904 * 1905 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1906 * 1907 * and we're trying to produce: 1908 * 1909 * DDX DDY 1910 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1911 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1912 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1913 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1914 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1915 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1916 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1917 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1918 * 1919 * and add another set of two more subspans if in 16-pixel dispatch mode. 1920 * 1921 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1922 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1923 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1924 * between each other. We could probably do it like ddx and swizzle the right 1925 * order later, but bail for now and just produce 1926 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1927 */ 1928void 1929fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1930{ 1931 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1932 BRW_REGISTER_TYPE_F, 1933 BRW_VERTICAL_STRIDE_2, 1934 BRW_WIDTH_2, 1935 BRW_HORIZONTAL_STRIDE_0, 1936 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1937 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1938 BRW_REGISTER_TYPE_F, 1939 BRW_VERTICAL_STRIDE_2, 1940 BRW_WIDTH_2, 1941 BRW_HORIZONTAL_STRIDE_0, 1942 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1943 brw_ADD(p, dst, src0, negate(src1)); 1944} 1945 1946void 1947fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1948{ 1949 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1950 BRW_REGISTER_TYPE_F, 1951 BRW_VERTICAL_STRIDE_4, 1952 BRW_WIDTH_4, 1953 BRW_HORIZONTAL_STRIDE_0, 1954 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1955 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1956 BRW_REGISTER_TYPE_F, 1957 BRW_VERTICAL_STRIDE_4, 1958 BRW_WIDTH_4, 1959 BRW_HORIZONTAL_STRIDE_0, 1960 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1961 brw_ADD(p, dst, src0, negate(src1)); 1962} 1963 1964void 1965fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 1966{ 1967 brw_push_insn_state(p); 1968 brw_set_mask_control(p, BRW_MASK_DISABLE); 1969 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 1970 brw_pop_insn_state(p); 1971} 1972 1973void 1974fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 1975{ 1976 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1977 mask = brw_uw1_reg(mask.file, mask.nr, 0); 1978 1979 brw_push_insn_state(p); 1980 brw_set_mask_control(p, BRW_MASK_DISABLE); 1981 brw_AND(p, g0, mask, g0); 1982 brw_pop_insn_state(p); 1983} 1984 1985void 1986fs_visitor::assign_curb_setup() 1987{ 1988 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1989 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1990 1991 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1992 foreach_iter(exec_list_iterator, iter, this->instructions) { 1993 fs_inst *inst = (fs_inst *)iter.get(); 1994 1995 for (unsigned int i = 0; i < 3; i++) { 1996 if (inst->src[i].file == UNIFORM) { 1997 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1998 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1999 constant_nr / 8, 2000 constant_nr % 8); 2001 2002 inst->src[i].file = FIXED_HW_REG; 2003 inst->src[i].fixed_hw_reg = brw_reg; 2004 } 2005 } 2006 } 2007} 2008 2009void 2010fs_visitor::calculate_urb_setup() 2011{ 2012 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2013 urb_setup[i] = -1; 2014 } 2015 2016 int urb_next = 0; 2017 /* Figure out where each of the incoming setup attributes lands. */ 2018 if (intel->gen >= 6) { 2019 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2020 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2021 urb_setup[i] = urb_next++; 2022 } 2023 } 2024 } else { 2025 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2026 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2027 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2028 int fp_index; 2029 2030 if (i >= VERT_RESULT_VAR0) 2031 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2032 else if (i <= VERT_RESULT_TEX7) 2033 fp_index = i; 2034 else 2035 fp_index = -1; 2036 2037 if (fp_index >= 0) 2038 urb_setup[fp_index] = urb_next++; 2039 } 2040 } 2041 } 2042 2043 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2044 c->prog_data.urb_read_length = urb_next * 2; 2045} 2046 2047void 2048fs_visitor::assign_urb_setup() 2049{ 2050 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2051 2052 /* Offset all the urb_setup[] index by the actual position of the 2053 * setup regs, now that the location of the constants has been chosen. 2054 */ 2055 foreach_iter(exec_list_iterator, iter, this->instructions) { 2056 fs_inst *inst = (fs_inst *)iter.get(); 2057 2058 if (inst->opcode != FS_OPCODE_LINTERP) 2059 continue; 2060 2061 assert(inst->src[2].file == FIXED_HW_REG); 2062 2063 inst->src[2].fixed_hw_reg.nr += urb_start; 2064 } 2065 2066 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2067} 2068 2069static void 2070assign_reg(int *reg_hw_locations, fs_reg *reg) 2071{ 2072 if (reg->file == GRF && reg->reg != 0) { 2073 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2074 reg->reg = 0; 2075 } 2076} 2077 2078void 2079fs_visitor::assign_regs_trivial() 2080{ 2081 int last_grf = 0; 2082 int hw_reg_mapping[this->virtual_grf_next]; 2083 int i; 2084 2085 hw_reg_mapping[0] = 0; 2086 hw_reg_mapping[1] = this->first_non_payload_grf; 2087 for (i = 2; i < this->virtual_grf_next; i++) { 2088 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2089 this->virtual_grf_sizes[i - 1]); 2090 } 2091 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2092 2093 foreach_iter(exec_list_iterator, iter, this->instructions) { 2094 fs_inst *inst = (fs_inst *)iter.get(); 2095 2096 assign_reg(hw_reg_mapping, &inst->dst); 2097 assign_reg(hw_reg_mapping, &inst->src[0]); 2098 assign_reg(hw_reg_mapping, &inst->src[1]); 2099 } 2100 2101 this->grf_used = last_grf + 1; 2102} 2103 2104void 2105fs_visitor::assign_regs() 2106{ 2107 int last_grf = 0; 2108 int hw_reg_mapping[this->virtual_grf_next + 1]; 2109 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2110 int class_sizes[base_reg_count]; 2111 int class_count = 0; 2112 int aligned_pair_class = -1; 2113 2114 /* Set up the register classes. 2115 * 2116 * The base registers store a scalar value. For texture samples, 2117 * we get virtual GRFs composed of 4 contiguous hw register. For 2118 * structures and arrays, we store them as contiguous larger things 2119 * than that, though we should be able to do better most of the 2120 * time. 2121 */ 2122 class_sizes[class_count++] = 1; 2123 if (brw->has_pln && intel->gen < 6) { 2124 /* Always set up the (unaligned) pairs for gen5, so we can find 2125 * them for making the aligned pair class. 2126 */ 2127 class_sizes[class_count++] = 2; 2128 } 2129 for (int r = 1; r < this->virtual_grf_next; r++) { 2130 int i; 2131 2132 for (i = 0; i < class_count; i++) { 2133 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2134 break; 2135 } 2136 if (i == class_count) { 2137 if (this->virtual_grf_sizes[r] >= base_reg_count) { 2138 fprintf(stderr, "Object too large to register allocate.\n"); 2139 this->fail = true; 2140 } 2141 2142 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2143 } 2144 } 2145 2146 int ra_reg_count = 0; 2147 int class_base_reg[class_count]; 2148 int class_reg_count[class_count]; 2149 int classes[class_count + 1]; 2150 2151 for (int i = 0; i < class_count; i++) { 2152 class_base_reg[i] = ra_reg_count; 2153 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2154 ra_reg_count += class_reg_count[i]; 2155 } 2156 2157 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2158 for (int i = 0; i < class_count; i++) { 2159 classes[i] = ra_alloc_reg_class(regs); 2160 2161 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2162 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2163 } 2164 2165 /* Add conflicts between our contiguous registers aliasing 2166 * base regs and other register classes' contiguous registers 2167 * that alias base regs, or the base regs themselves for classes[0]. 2168 */ 2169 for (int c = 0; c <= i; c++) { 2170 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2171 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2172 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); 2173 c_r++) { 2174 2175 if (0) { 2176 printf("%d/%d conflicts %d/%d\n", 2177 class_sizes[i], this->first_non_payload_grf + i_r, 2178 class_sizes[c], this->first_non_payload_grf + c_r); 2179 } 2180 2181 ra_add_reg_conflict(regs, 2182 class_base_reg[i] + i_r, 2183 class_base_reg[c] + c_r); 2184 } 2185 } 2186 } 2187 } 2188 2189 /* Add a special class for aligned pairs, which we'll put delta_x/y 2190 * in on gen5 so that we can do PLN. 2191 */ 2192 if (brw->has_pln && intel->gen < 6) { 2193 int reg_count = (base_reg_count - 1) / 2; 2194 int unaligned_pair_class = 1; 2195 assert(class_sizes[unaligned_pair_class] == 2); 2196 2197 aligned_pair_class = class_count; 2198 classes[aligned_pair_class] = ra_alloc_reg_class(regs); 2199 class_base_reg[aligned_pair_class] = 0; 2200 class_reg_count[aligned_pair_class] = 0; 2201 int start = (this->first_non_payload_grf & 1) ? 1 : 0; 2202 2203 for (int i = 0; i < reg_count; i++) { 2204 ra_class_add_reg(regs, classes[aligned_pair_class], 2205 class_base_reg[unaligned_pair_class] + i * 2 + start); 2206 } 2207 class_count++; 2208 } 2209 2210 ra_set_finalize(regs); 2211 2212 struct ra_graph *g = ra_alloc_interference_graph(regs, 2213 this->virtual_grf_next); 2214 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2215 * with nodes. 2216 */ 2217 ra_set_node_class(g, 0, classes[0]); 2218 2219 for (int i = 1; i < this->virtual_grf_next; i++) { 2220 for (int c = 0; c < class_count; c++) { 2221 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2222 if (aligned_pair_class >= 0 && 2223 this->delta_x.reg == i) { 2224 ra_set_node_class(g, i, classes[aligned_pair_class]); 2225 } else { 2226 ra_set_node_class(g, i, classes[c]); 2227 } 2228 break; 2229 } 2230 } 2231 2232 for (int j = 1; j < i; j++) { 2233 if (virtual_grf_interferes(i, j)) { 2234 ra_add_node_interference(g, i, j); 2235 } 2236 } 2237 } 2238 2239 /* FINISHME: Handle spilling */ 2240 if (!ra_allocate_no_spills(g)) { 2241 fprintf(stderr, "Failed to allocate registers.\n"); 2242 this->fail = true; 2243 return; 2244 } 2245 2246 /* Get the chosen virtual registers for each node, and map virtual 2247 * regs in the register classes back down to real hardware reg 2248 * numbers. 2249 */ 2250 hw_reg_mapping[0] = 0; /* unused */ 2251 for (int i = 1; i < this->virtual_grf_next; i++) { 2252 int reg = ra_get_node_reg(g, i); 2253 int hw_reg = -1; 2254 2255 for (int c = 0; c < class_count; c++) { 2256 if (reg >= class_base_reg[c] && 2257 reg < class_base_reg[c] + class_reg_count[c]) { 2258 hw_reg = reg - class_base_reg[c]; 2259 break; 2260 } 2261 } 2262 2263 assert(hw_reg != -1); 2264 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2265 last_grf = MAX2(last_grf, 2266 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2267 } 2268 2269 foreach_iter(exec_list_iterator, iter, this->instructions) { 2270 fs_inst *inst = (fs_inst *)iter.get(); 2271 2272 assign_reg(hw_reg_mapping, &inst->dst); 2273 assign_reg(hw_reg_mapping, &inst->src[0]); 2274 assign_reg(hw_reg_mapping, &inst->src[1]); 2275 } 2276 2277 this->grf_used = last_grf + 1; 2278 2279 talloc_free(g); 2280 talloc_free(regs); 2281} 2282 2283void 2284fs_visitor::calculate_live_intervals() 2285{ 2286 int num_vars = this->virtual_grf_next; 2287 int *def = talloc_array(mem_ctx, int, num_vars); 2288 int *use = talloc_array(mem_ctx, int, num_vars); 2289 int loop_depth = 0; 2290 int loop_start = 0; 2291 2292 for (int i = 0; i < num_vars; i++) { 2293 def[i] = 1 << 30; 2294 use[i] = -1; 2295 } 2296 2297 int ip = 0; 2298 foreach_iter(exec_list_iterator, iter, this->instructions) { 2299 fs_inst *inst = (fs_inst *)iter.get(); 2300 2301 if (inst->opcode == BRW_OPCODE_DO) { 2302 if (loop_depth++ == 0) 2303 loop_start = ip; 2304 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2305 loop_depth--; 2306 2307 if (loop_depth == 0) { 2308 /* FINISHME: 2309 * 2310 * Patches up any vars marked for use within the loop as 2311 * live until the end. This is conservative, as there 2312 * will often be variables defined and used inside the 2313 * loop but dead at the end of the loop body. 2314 */ 2315 for (int i = 0; i < num_vars; i++) { 2316 if (use[i] == loop_start) { 2317 use[i] = ip; 2318 } 2319 } 2320 } 2321 } else { 2322 int eip = ip; 2323 2324 if (loop_depth) 2325 eip = loop_start; 2326 2327 for (unsigned int i = 0; i < 3; i++) { 2328 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2329 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2330 } 2331 } 2332 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2333 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2334 } 2335 } 2336 2337 ip++; 2338 } 2339 2340 talloc_free(this->virtual_grf_def); 2341 talloc_free(this->virtual_grf_use); 2342 this->virtual_grf_def = def; 2343 this->virtual_grf_use = use; 2344} 2345 2346/** 2347 * Attempts to move immediate constants into the immediate 2348 * constant slot of following instructions. 2349 * 2350 * Immediate constants are a bit tricky -- they have to be in the last 2351 * operand slot, you can't do abs/negate on them, 2352 */ 2353 2354bool 2355fs_visitor::propagate_constants() 2356{ 2357 bool progress = false; 2358 2359 foreach_iter(exec_list_iterator, iter, this->instructions) { 2360 fs_inst *inst = (fs_inst *)iter.get(); 2361 2362 if (inst->opcode != BRW_OPCODE_MOV || 2363 inst->predicated || 2364 inst->dst.file != GRF || inst->src[0].file != IMM || 2365 inst->dst.type != inst->src[0].type) 2366 continue; 2367 2368 /* Don't bother with cases where we should have had the 2369 * operation on the constant folded in GLSL already. 2370 */ 2371 if (inst->saturate) 2372 continue; 2373 2374 /* Found a move of a constant to a GRF. Find anything else using the GRF 2375 * before it's written, and replace it with the constant if we can. 2376 */ 2377 exec_list_iterator scan_iter = iter; 2378 scan_iter.next(); 2379 for (; scan_iter.has_next(); scan_iter.next()) { 2380 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2381 2382 if (scan_inst->opcode == BRW_OPCODE_DO || 2383 scan_inst->opcode == BRW_OPCODE_WHILE || 2384 scan_inst->opcode == BRW_OPCODE_ELSE || 2385 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2386 break; 2387 } 2388 2389 for (int i = 2; i >= 0; i--) { 2390 if (scan_inst->src[i].file != GRF || 2391 scan_inst->src[i].reg != inst->dst.reg || 2392 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2393 continue; 2394 2395 /* Don't bother with cases where we should have had the 2396 * operation on the constant folded in GLSL already. 2397 */ 2398 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2399 continue; 2400 2401 switch (scan_inst->opcode) { 2402 case BRW_OPCODE_MOV: 2403 scan_inst->src[i] = inst->src[0]; 2404 progress = true; 2405 break; 2406 2407 case BRW_OPCODE_MUL: 2408 case BRW_OPCODE_ADD: 2409 if (i == 1) { 2410 scan_inst->src[i] = inst->src[0]; 2411 progress = true; 2412 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2413 /* Fit this constant in by commuting the operands */ 2414 scan_inst->src[0] = scan_inst->src[1]; 2415 scan_inst->src[1] = inst->src[0]; 2416 } 2417 break; 2418 case BRW_OPCODE_CMP: 2419 if (i == 1) { 2420 scan_inst->src[i] = inst->src[0]; 2421 progress = true; 2422 } 2423 } 2424 } 2425 2426 if (scan_inst->dst.file == GRF && 2427 scan_inst->dst.reg == inst->dst.reg && 2428 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2429 scan_inst->opcode == FS_OPCODE_TEX)) { 2430 break; 2431 } 2432 } 2433 } 2434 2435 return progress; 2436} 2437/** 2438 * Must be called after calculate_live_intervales() to remove unused 2439 * writes to registers -- register allocation will fail otherwise 2440 * because something deffed but not used won't be considered to 2441 * interfere with other regs. 2442 */ 2443bool 2444fs_visitor::dead_code_eliminate() 2445{ 2446 bool progress = false; 2447 int num_vars = this->virtual_grf_next; 2448 bool dead[num_vars]; 2449 2450 for (int i = 0; i < num_vars; i++) { 2451 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; 2452 2453 if (dead[i]) { 2454 /* Mark off its interval so it won't interfere with anything. */ 2455 this->virtual_grf_def[i] = -1; 2456 this->virtual_grf_use[i] = -1; 2457 } 2458 } 2459 2460 foreach_iter(exec_list_iterator, iter, this->instructions) { 2461 fs_inst *inst = (fs_inst *)iter.get(); 2462 2463 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2464 inst->remove(); 2465 progress = true; 2466 } 2467 } 2468 2469 return progress; 2470} 2471 2472bool 2473fs_visitor::register_coalesce() 2474{ 2475 bool progress = false; 2476 2477 foreach_iter(exec_list_iterator, iter, this->instructions) { 2478 fs_inst *inst = (fs_inst *)iter.get(); 2479 2480 if (inst->opcode != BRW_OPCODE_MOV || 2481 inst->predicated || 2482 inst->saturate || 2483 inst->dst.file != GRF || inst->src[0].file != GRF || 2484 inst->dst.type != inst->src[0].type) 2485 continue; 2486 2487 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2488 * them: check for no writes to either one until the exit of the 2489 * program. 2490 */ 2491 bool interfered = false; 2492 exec_list_iterator scan_iter = iter; 2493 scan_iter.next(); 2494 for (; scan_iter.has_next(); scan_iter.next()) { 2495 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2496 2497 if (scan_inst->opcode == BRW_OPCODE_DO || 2498 scan_inst->opcode == BRW_OPCODE_WHILE || 2499 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2500 interfered = true; 2501 iter = scan_iter; 2502 break; 2503 } 2504 2505 if (scan_inst->dst.file == GRF) { 2506 if (scan_inst->dst.reg == inst->dst.reg && 2507 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2508 scan_inst->opcode == FS_OPCODE_TEX)) { 2509 interfered = true; 2510 break; 2511 } 2512 if (scan_inst->dst.reg == inst->src[0].reg && 2513 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2514 scan_inst->opcode == FS_OPCODE_TEX)) { 2515 interfered = true; 2516 break; 2517 } 2518 } 2519 } 2520 if (interfered) { 2521 continue; 2522 } 2523 2524 /* Rewrite the later usage to point at the source of the move to 2525 * be removed. 2526 */ 2527 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2528 scan_iter.next()) { 2529 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2530 2531 for (int i = 0; i < 3; i++) { 2532 if (scan_inst->src[i].file == GRF && 2533 scan_inst->src[i].reg == inst->dst.reg && 2534 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2535 scan_inst->src[i].reg = inst->src[0].reg; 2536 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2537 scan_inst->src[i].abs |= inst->src[0].abs; 2538 scan_inst->src[i].negate ^= inst->src[0].negate; 2539 } 2540 } 2541 } 2542 2543 inst->remove(); 2544 progress = true; 2545 } 2546 2547 return progress; 2548} 2549 2550 2551bool 2552fs_visitor::compute_to_mrf() 2553{ 2554 bool progress = false; 2555 int next_ip = 0; 2556 2557 foreach_iter(exec_list_iterator, iter, this->instructions) { 2558 fs_inst *inst = (fs_inst *)iter.get(); 2559 2560 int ip = next_ip; 2561 next_ip++; 2562 2563 if (inst->opcode != BRW_OPCODE_MOV || 2564 inst->predicated || 2565 inst->dst.file != MRF || inst->src[0].file != GRF || 2566 inst->dst.type != inst->src[0].type || 2567 inst->src[0].abs || inst->src[0].negate) 2568 continue; 2569 2570 /* Can't compute-to-MRF this GRF if someone else was going to 2571 * read it later. 2572 */ 2573 if (this->virtual_grf_use[inst->src[0].reg] > ip) 2574 continue; 2575 2576 /* Found a move of a GRF to a MRF. Let's see if we can go 2577 * rewrite the thing that made this GRF to write into the MRF. 2578 */ 2579 bool found = false; 2580 fs_inst *scan_inst; 2581 for (scan_inst = (fs_inst *)inst->prev; 2582 scan_inst->prev != NULL; 2583 scan_inst = (fs_inst *)scan_inst->prev) { 2584 /* We don't handle flow control here. Most computation of 2585 * values that end up in MRFs are shortly before the MRF 2586 * write anyway. 2587 */ 2588 if (scan_inst->opcode == BRW_OPCODE_DO || 2589 scan_inst->opcode == BRW_OPCODE_WHILE || 2590 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2591 break; 2592 } 2593 2594 /* You can't read from an MRF, so if someone else reads our 2595 * MRF's source GRF that we wanted to rewrite, that stops us. 2596 */ 2597 bool interfered = false; 2598 for (int i = 0; i < 3; i++) { 2599 if (scan_inst->src[i].file == GRF && 2600 scan_inst->src[i].reg == inst->src[0].reg && 2601 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 2602 interfered = true; 2603 } 2604 } 2605 if (interfered) 2606 break; 2607 2608 if (scan_inst->dst.file == MRF && 2609 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 2610 /* Somebody else wrote our MRF here, so we can't can't 2611 * compute-to-MRF before that. 2612 */ 2613 break; 2614 } 2615 2616 if (scan_inst->mlen > 0) { 2617 /* Found a SEND instruction, which will do some amount of 2618 * implied write that may overwrite our MRF that we were 2619 * hoping to compute-to-MRF somewhere above it. Nothing 2620 * we have implied-writes more than 2 MRFs from base_mrf, 2621 * though. 2622 */ 2623 int implied_write_len = MIN2(scan_inst->mlen, 2); 2624 if (inst->dst.hw_reg >= scan_inst->base_mrf && 2625 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { 2626 break; 2627 } 2628 } 2629 2630 if (scan_inst->dst.file == GRF && 2631 scan_inst->dst.reg == inst->src[0].reg) { 2632 /* Found the last thing to write our reg we want to turn 2633 * into a compute-to-MRF. 2634 */ 2635 2636 if (scan_inst->opcode == FS_OPCODE_TEX) { 2637 /* texturing writes several continuous regs, so we can't 2638 * compute-to-mrf that. 2639 */ 2640 break; 2641 } 2642 2643 /* If it's predicated, it (probably) didn't populate all 2644 * the channels. 2645 */ 2646 if (scan_inst->predicated) 2647 break; 2648 2649 /* SEND instructions can't have MRF as a destination. */ 2650 if (scan_inst->mlen) 2651 break; 2652 2653 if (intel->gen >= 6) { 2654 /* gen6 math instructions must have the destination be 2655 * GRF, so no compute-to-MRF for them. 2656 */ 2657 if (scan_inst->opcode == FS_OPCODE_RCP || 2658 scan_inst->opcode == FS_OPCODE_RSQ || 2659 scan_inst->opcode == FS_OPCODE_SQRT || 2660 scan_inst->opcode == FS_OPCODE_EXP2 || 2661 scan_inst->opcode == FS_OPCODE_LOG2 || 2662 scan_inst->opcode == FS_OPCODE_SIN || 2663 scan_inst->opcode == FS_OPCODE_COS || 2664 scan_inst->opcode == FS_OPCODE_POW) { 2665 break; 2666 } 2667 } 2668 2669 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 2670 /* Found the creator of our MRF's source value. */ 2671 found = true; 2672 break; 2673 } 2674 } 2675 } 2676 if (found) { 2677 scan_inst->dst.file = MRF; 2678 scan_inst->dst.hw_reg = inst->dst.hw_reg; 2679 scan_inst->saturate |= inst->saturate; 2680 inst->remove(); 2681 progress = true; 2682 } 2683 } 2684 2685 return progress; 2686} 2687 2688bool 2689fs_visitor::virtual_grf_interferes(int a, int b) 2690{ 2691 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2692 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2693 2694 /* For dead code, just check if the def interferes with the other range. */ 2695 if (this->virtual_grf_use[a] == -1) { 2696 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 2697 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 2698 } 2699 if (this->virtual_grf_use[b] == -1) { 2700 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 2701 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 2702 } 2703 2704 return start < end; 2705} 2706 2707static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2708{ 2709 struct brw_reg brw_reg; 2710 2711 switch (reg->file) { 2712 case GRF: 2713 case ARF: 2714 case MRF: 2715 brw_reg = brw_vec8_reg(reg->file, 2716 reg->hw_reg, 0); 2717 brw_reg = retype(brw_reg, reg->type); 2718 break; 2719 case IMM: 2720 switch (reg->type) { 2721 case BRW_REGISTER_TYPE_F: 2722 brw_reg = brw_imm_f(reg->imm.f); 2723 break; 2724 case BRW_REGISTER_TYPE_D: 2725 brw_reg = brw_imm_d(reg->imm.i); 2726 break; 2727 case BRW_REGISTER_TYPE_UD: 2728 brw_reg = brw_imm_ud(reg->imm.u); 2729 break; 2730 default: 2731 assert(!"not reached"); 2732 break; 2733 } 2734 break; 2735 case FIXED_HW_REG: 2736 brw_reg = reg->fixed_hw_reg; 2737 break; 2738 case BAD_FILE: 2739 /* Probably unused. */ 2740 brw_reg = brw_null_reg(); 2741 break; 2742 case UNIFORM: 2743 assert(!"not reached"); 2744 brw_reg = brw_null_reg(); 2745 break; 2746 } 2747 if (reg->abs) 2748 brw_reg = brw_abs(brw_reg); 2749 if (reg->negate) 2750 brw_reg = negate(brw_reg); 2751 2752 return brw_reg; 2753} 2754 2755void 2756fs_visitor::generate_code() 2757{ 2758 unsigned int annotation_len = 0; 2759 int last_native_inst = 0; 2760 struct brw_instruction *if_stack[16], *loop_stack[16]; 2761 int if_stack_depth = 0, loop_stack_depth = 0; 2762 int if_depth_in_loop[16]; 2763 2764 if_depth_in_loop[loop_stack_depth] = 0; 2765 2766 memset(&if_stack, 0, sizeof(if_stack)); 2767 foreach_iter(exec_list_iterator, iter, this->instructions) { 2768 fs_inst *inst = (fs_inst *)iter.get(); 2769 struct brw_reg src[3], dst; 2770 2771 for (unsigned int i = 0; i < 3; i++) { 2772 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2773 } 2774 dst = brw_reg_from_fs_reg(&inst->dst); 2775 2776 brw_set_conditionalmod(p, inst->conditional_mod); 2777 brw_set_predicate_control(p, inst->predicated); 2778 2779 switch (inst->opcode) { 2780 case BRW_OPCODE_MOV: 2781 brw_MOV(p, dst, src[0]); 2782 break; 2783 case BRW_OPCODE_ADD: 2784 brw_ADD(p, dst, src[0], src[1]); 2785 break; 2786 case BRW_OPCODE_MUL: 2787 brw_MUL(p, dst, src[0], src[1]); 2788 break; 2789 2790 case BRW_OPCODE_FRC: 2791 brw_FRC(p, dst, src[0]); 2792 break; 2793 case BRW_OPCODE_RNDD: 2794 brw_RNDD(p, dst, src[0]); 2795 break; 2796 case BRW_OPCODE_RNDZ: 2797 brw_RNDZ(p, dst, src[0]); 2798 break; 2799 2800 case BRW_OPCODE_AND: 2801 brw_AND(p, dst, src[0], src[1]); 2802 break; 2803 case BRW_OPCODE_OR: 2804 brw_OR(p, dst, src[0], src[1]); 2805 break; 2806 case BRW_OPCODE_XOR: 2807 brw_XOR(p, dst, src[0], src[1]); 2808 break; 2809 case BRW_OPCODE_NOT: 2810 brw_NOT(p, dst, src[0]); 2811 break; 2812 case BRW_OPCODE_ASR: 2813 brw_ASR(p, dst, src[0], src[1]); 2814 break; 2815 case BRW_OPCODE_SHR: 2816 brw_SHR(p, dst, src[0], src[1]); 2817 break; 2818 case BRW_OPCODE_SHL: 2819 brw_SHL(p, dst, src[0], src[1]); 2820 break; 2821 2822 case BRW_OPCODE_CMP: 2823 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2824 break; 2825 case BRW_OPCODE_SEL: 2826 brw_SEL(p, dst, src[0], src[1]); 2827 break; 2828 2829 case BRW_OPCODE_IF: 2830 assert(if_stack_depth < 16); 2831 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2832 if_depth_in_loop[loop_stack_depth]++; 2833 if_stack_depth++; 2834 break; 2835 case BRW_OPCODE_ELSE: 2836 if_stack[if_stack_depth - 1] = 2837 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2838 break; 2839 case BRW_OPCODE_ENDIF: 2840 if_stack_depth--; 2841 brw_ENDIF(p , if_stack[if_stack_depth]); 2842 if_depth_in_loop[loop_stack_depth]--; 2843 break; 2844 2845 case BRW_OPCODE_DO: 2846 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2847 if_depth_in_loop[loop_stack_depth] = 0; 2848 break; 2849 2850 case BRW_OPCODE_BREAK: 2851 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2852 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2853 break; 2854 case BRW_OPCODE_CONTINUE: 2855 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2856 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2857 break; 2858 2859 case BRW_OPCODE_WHILE: { 2860 struct brw_instruction *inst0, *inst1; 2861 GLuint br = 1; 2862 2863 if (intel->gen >= 5) 2864 br = 2; 2865 2866 assert(loop_stack_depth > 0); 2867 loop_stack_depth--; 2868 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2869 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2870 while (inst0 > loop_stack[loop_stack_depth]) { 2871 inst0--; 2872 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2873 inst0->bits3.if_else.jump_count == 0) { 2874 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2875 } 2876 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2877 inst0->bits3.if_else.jump_count == 0) { 2878 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2879 } 2880 } 2881 } 2882 break; 2883 2884 case FS_OPCODE_RCP: 2885 case FS_OPCODE_RSQ: 2886 case FS_OPCODE_SQRT: 2887 case FS_OPCODE_EXP2: 2888 case FS_OPCODE_LOG2: 2889 case FS_OPCODE_POW: 2890 case FS_OPCODE_SIN: 2891 case FS_OPCODE_COS: 2892 generate_math(inst, dst, src); 2893 break; 2894 case FS_OPCODE_LINTERP: 2895 generate_linterp(inst, dst, src); 2896 break; 2897 case FS_OPCODE_TEX: 2898 case FS_OPCODE_TXB: 2899 case FS_OPCODE_TXL: 2900 generate_tex(inst, dst); 2901 break; 2902 case FS_OPCODE_DISCARD_NOT: 2903 generate_discard_not(inst, dst); 2904 break; 2905 case FS_OPCODE_DISCARD_AND: 2906 generate_discard_and(inst, src[0]); 2907 break; 2908 case FS_OPCODE_DDX: 2909 generate_ddx(inst, dst, src[0]); 2910 break; 2911 case FS_OPCODE_DDY: 2912 generate_ddy(inst, dst, src[0]); 2913 break; 2914 case FS_OPCODE_FB_WRITE: 2915 generate_fb_write(inst); 2916 break; 2917 default: 2918 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2919 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2920 brw_opcodes[inst->opcode].name); 2921 } else { 2922 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2923 } 2924 this->fail = true; 2925 } 2926 2927 if (annotation_len < p->nr_insn) { 2928 annotation_len *= 2; 2929 if (annotation_len < 16) 2930 annotation_len = 16; 2931 2932 this->annotation_string = talloc_realloc(this->mem_ctx, 2933 annotation_string, 2934 const char *, 2935 annotation_len); 2936 this->annotation_ir = talloc_realloc(this->mem_ctx, 2937 annotation_ir, 2938 ir_instruction *, 2939 annotation_len); 2940 } 2941 2942 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2943 this->annotation_string[i] = inst->annotation; 2944 this->annotation_ir[i] = inst->ir; 2945 } 2946 last_native_inst = p->nr_insn; 2947 } 2948} 2949 2950GLboolean 2951brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2952{ 2953 struct brw_compile *p = &c->func; 2954 struct intel_context *intel = &brw->intel; 2955 GLcontext *ctx = &intel->ctx; 2956 struct brw_shader *shader = NULL; 2957 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2958 2959 if (!prog) 2960 return GL_FALSE; 2961 2962 if (!using_new_fs) 2963 return GL_FALSE; 2964 2965 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2966 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2967 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2968 break; 2969 } 2970 } 2971 if (!shader) 2972 return GL_FALSE; 2973 2974 /* We always use 8-wide mode, at least for now. For one, flow 2975 * control only works in 8-wide. Also, when we're fragment shader 2976 * bound, we're almost always under register pressure as well, so 2977 * 8-wide would save us from the performance cliff of spilling 2978 * regs. 2979 */ 2980 c->dispatch_width = 8; 2981 2982 if (INTEL_DEBUG & DEBUG_WM) { 2983 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2984 _mesa_print_ir(shader->ir, NULL); 2985 printf("\n"); 2986 } 2987 2988 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2989 */ 2990 fs_visitor v(c, shader); 2991 2992 if (0) { 2993 v.emit_dummy_fs(); 2994 } else { 2995 v.calculate_urb_setup(); 2996 if (intel->gen < 6) 2997 v.emit_interpolation_setup_gen4(); 2998 else 2999 v.emit_interpolation_setup_gen6(); 3000 3001 /* Generate FS IR for main(). (the visitor only descends into 3002 * functions called "main"). 3003 */ 3004 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3005 ir_instruction *ir = (ir_instruction *)iter.get(); 3006 v.base_ir = ir; 3007 ir->accept(&v); 3008 } 3009 3010 v.emit_fb_writes(); 3011 v.assign_curb_setup(); 3012 v.assign_urb_setup(); 3013 3014 bool progress; 3015 do { 3016 progress = false; 3017 3018 v.calculate_live_intervals(); 3019 progress = v.propagate_constants() || progress; 3020 progress = v.register_coalesce() || progress; 3021 progress = v.compute_to_mrf() || progress; 3022 progress = v.dead_code_eliminate() || progress; 3023 } while (progress); 3024 3025 if (0) 3026 v.assign_regs_trivial(); 3027 else 3028 v.assign_regs(); 3029 } 3030 3031 if (!v.fail) 3032 v.generate_code(); 3033 3034 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3035 3036 if (v.fail) 3037 return GL_FALSE; 3038 3039 if (INTEL_DEBUG & DEBUG_WM) { 3040 const char *last_annotation_string = NULL; 3041 ir_instruction *last_annotation_ir = NULL; 3042 3043 printf("Native code for fragment shader %d:\n", prog->Name); 3044 for (unsigned int i = 0; i < p->nr_insn; i++) { 3045 if (last_annotation_ir != v.annotation_ir[i]) { 3046 last_annotation_ir = v.annotation_ir[i]; 3047 if (last_annotation_ir) { 3048 printf(" "); 3049 last_annotation_ir->print(); 3050 printf("\n"); 3051 } 3052 } 3053 if (last_annotation_string != v.annotation_string[i]) { 3054 last_annotation_string = v.annotation_string[i]; 3055 if (last_annotation_string) 3056 printf(" %s\n", last_annotation_string); 3057 } 3058 brw_disasm(stdout, &p->store[i], intel->gen); 3059 } 3060 printf("\n"); 3061 } 3062 3063 c->prog_data.total_grf = v.grf_used; 3064 c->prog_data.total_scratch = 0; 3065 3066 return GL_TRUE; 3067} 3068