brw_fs.cpp revision 9935fe705df44bb633039ca74332cc0c126ccc30
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = talloc_zero(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = talloc_zero(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct brw_shader *shader = 93 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 94 if (shader != NULL) { 95 void *mem_ctx = talloc_new(NULL); 96 bool progress; 97 98 if (shader->ir) 99 talloc_free(shader->ir); 100 shader->ir = new(shader) exec_list; 101 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 102 103 do_mat_op_to_vec(shader->ir); 104 do_mod_to_fract(shader->ir); 105 do_div_to_mul_rcp(shader->ir); 106 do_sub_to_add_neg(shader->ir); 107 do_explog_to_explog2(shader->ir); 108 do_lower_texture_projection(shader->ir); 109 brw_do_cubemap_normalize(shader->ir); 110 111 do { 112 progress = false; 113 114 brw_do_channel_expressions(shader->ir); 115 brw_do_vector_splitting(shader->ir); 116 117 progress = do_lower_jumps(shader->ir, true, true, 118 true, /* main return */ 119 false, /* continue */ 120 false /* loops */ 121 ) || progress; 122 123 progress = do_common_optimization(shader->ir, true, 32) || progress; 124 125 progress = lower_noise(shader->ir) || progress; 126 progress = 127 lower_variable_index_to_cond_assign(shader->ir, 128 GL_TRUE, /* input */ 129 GL_TRUE, /* output */ 130 GL_TRUE, /* temp */ 131 GL_TRUE /* uniform */ 132 ) || progress; 133 } while (progress); 134 135 validate_ir_tree(shader->ir); 136 137 reparent_ir(shader->ir, shader->ir); 138 talloc_free(mem_ctx); 139 } 140 141 if (!_mesa_ir_link_shader(ctx, prog)) 142 return GL_FALSE; 143 144 return GL_TRUE; 145} 146 147static int 148type_size(const struct glsl_type *type) 149{ 150 unsigned int size, i; 151 152 switch (type->base_type) { 153 case GLSL_TYPE_UINT: 154 case GLSL_TYPE_INT: 155 case GLSL_TYPE_FLOAT: 156 case GLSL_TYPE_BOOL: 157 return type->components(); 158 case GLSL_TYPE_ARRAY: 159 return type_size(type->fields.array) * type->length; 160 case GLSL_TYPE_STRUCT: 161 size = 0; 162 for (i = 0; i < type->length; i++) { 163 size += type_size(type->fields.structure[i].type); 164 } 165 return size; 166 case GLSL_TYPE_SAMPLER: 167 /* Samplers take up no register space, since they're baked in at 168 * link time. 169 */ 170 return 0; 171 default: 172 assert(!"not reached"); 173 return 0; 174 } 175} 176 177int 178fs_visitor::virtual_grf_alloc(int size) 179{ 180 if (virtual_grf_array_size <= virtual_grf_next) { 181 if (virtual_grf_array_size == 0) 182 virtual_grf_array_size = 16; 183 else 184 virtual_grf_array_size *= 2; 185 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 186 int, virtual_grf_array_size); 187 188 /* This slot is always unused. */ 189 virtual_grf_sizes[0] = 0; 190 } 191 virtual_grf_sizes[virtual_grf_next] = size; 192 return virtual_grf_next++; 193} 194 195/** Fixed HW reg constructor. */ 196fs_reg::fs_reg(enum register_file file, int hw_reg) 197{ 198 init(); 199 this->file = file; 200 this->hw_reg = hw_reg; 201 this->type = BRW_REGISTER_TYPE_F; 202} 203 204/** Fixed HW reg constructor. */ 205fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 206{ 207 init(); 208 this->file = file; 209 this->hw_reg = hw_reg; 210 this->type = type; 211} 212 213int 214brw_type_for_base_type(const struct glsl_type *type) 215{ 216 switch (type->base_type) { 217 case GLSL_TYPE_FLOAT: 218 return BRW_REGISTER_TYPE_F; 219 case GLSL_TYPE_INT: 220 case GLSL_TYPE_BOOL: 221 return BRW_REGISTER_TYPE_D; 222 case GLSL_TYPE_UINT: 223 return BRW_REGISTER_TYPE_UD; 224 case GLSL_TYPE_ARRAY: 225 case GLSL_TYPE_STRUCT: 226 case GLSL_TYPE_SAMPLER: 227 /* These should be overridden with the type of the member when 228 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 229 * way to trip up if we don't. 230 */ 231 return BRW_REGISTER_TYPE_UD; 232 default: 233 assert(!"not reached"); 234 return BRW_REGISTER_TYPE_F; 235 } 236} 237 238/** Automatic reg constructor. */ 239fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 240{ 241 init(); 242 243 this->file = GRF; 244 this->reg = v->virtual_grf_alloc(type_size(type)); 245 this->reg_offset = 0; 246 this->type = brw_type_for_base_type(type); 247} 248 249fs_reg * 250fs_visitor::variable_storage(ir_variable *var) 251{ 252 return (fs_reg *)hash_table_find(this->variable_ht, var); 253} 254 255/* Our support for uniforms is piggy-backed on the struct 256 * gl_fragment_program, because that's where the values actually 257 * get stored, rather than in some global gl_shader_program uniform 258 * store. 259 */ 260int 261fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 262{ 263 unsigned int offset = 0; 264 float *vec_values; 265 266 if (type->is_matrix()) { 267 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 268 type->vector_elements, 269 1); 270 271 for (unsigned int i = 0; i < type->matrix_columns; i++) { 272 offset += setup_uniform_values(loc + offset, column); 273 } 274 275 return offset; 276 } 277 278 switch (type->base_type) { 279 case GLSL_TYPE_FLOAT: 280 case GLSL_TYPE_UINT: 281 case GLSL_TYPE_INT: 282 case GLSL_TYPE_BOOL: 283 vec_values = fp->Base.Parameters->ParameterValues[loc]; 284 for (unsigned int i = 0; i < type->vector_elements; i++) { 285 unsigned int param = c->prog_data.nr_params++; 286 287 assert(param < ARRAY_SIZE(c->prog_data.param)); 288 289 switch (type->base_type) { 290 case GLSL_TYPE_FLOAT: 291 c->prog_data.param_convert[param] = PARAM_NO_CONVERT; 292 break; 293 case GLSL_TYPE_UINT: 294 c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; 295 break; 296 case GLSL_TYPE_INT: 297 c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; 298 break; 299 case GLSL_TYPE_BOOL: 300 c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; 301 break; 302 } 303 304 c->prog_data.param[param] = &vec_values[i]; 305 } 306 return 1; 307 308 case GLSL_TYPE_STRUCT: 309 for (unsigned int i = 0; i < type->length; i++) { 310 offset += setup_uniform_values(loc + offset, 311 type->fields.structure[i].type); 312 } 313 return offset; 314 315 case GLSL_TYPE_ARRAY: 316 for (unsigned int i = 0; i < type->length; i++) { 317 offset += setup_uniform_values(loc + offset, type->fields.array); 318 } 319 return offset; 320 321 case GLSL_TYPE_SAMPLER: 322 /* The sampler takes up a slot, but we don't use any values from it. */ 323 return 1; 324 325 default: 326 assert(!"not reached"); 327 return 0; 328 } 329} 330 331 332/* Our support for builtin uniforms is even scarier than non-builtin. 333 * It sits on top of the PROG_STATE_VAR parameters that are 334 * automatically updated from GL context state. 335 */ 336void 337fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 338{ 339 const struct gl_builtin_uniform_desc *statevar = NULL; 340 341 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 342 statevar = &_mesa_builtin_uniform_desc[i]; 343 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 344 break; 345 } 346 347 if (!statevar->name) { 348 this->fail = true; 349 printf("Failed to find builtin uniform `%s'\n", ir->name); 350 return; 351 } 352 353 int array_count; 354 if (ir->type->is_array()) { 355 array_count = ir->type->length; 356 } else { 357 array_count = 1; 358 } 359 360 for (int a = 0; a < array_count; a++) { 361 for (unsigned int i = 0; i < statevar->num_elements; i++) { 362 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 363 int tokens[STATE_LENGTH]; 364 365 memcpy(tokens, element->tokens, sizeof(element->tokens)); 366 if (ir->type->is_array()) { 367 tokens[1] = a; 368 } 369 370 /* This state reference has already been setup by ir_to_mesa, 371 * but we'll get the same index back here. 372 */ 373 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 374 (gl_state_index *)tokens); 375 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 376 377 /* Add each of the unique swizzles of the element as a 378 * parameter. This'll end up matching the expected layout of 379 * the array/matrix/structure we're trying to fill in. 380 */ 381 int last_swiz = -1; 382 for (unsigned int i = 0; i < 4; i++) { 383 int swiz = GET_SWZ(element->swizzle, i); 384 if (swiz == last_swiz) 385 break; 386 last_swiz = swiz; 387 388 c->prog_data.param_convert[c->prog_data.nr_params] = 389 PARAM_NO_CONVERT; 390 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 391 } 392 } 393 } 394} 395 396fs_reg * 397fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 398{ 399 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 400 fs_reg wpos = *reg; 401 fs_reg neg_y = this->pixel_y; 402 neg_y.negate = true; 403 bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; 404 405 /* gl_FragCoord.x */ 406 if (ir->pixel_center_integer) { 407 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 408 } else { 409 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 410 } 411 wpos.reg_offset++; 412 413 /* gl_FragCoord.y */ 414 if (!flip && ir->pixel_center_integer) { 415 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 416 } else { 417 fs_reg pixel_y = this->pixel_y; 418 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 419 420 if (flip) { 421 pixel_y.negate = true; 422 offset += c->key.drawable_height - 1.0; 423 } 424 425 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 426 } 427 wpos.reg_offset++; 428 429 /* gl_FragCoord.z */ 430 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 431 interp_reg(FRAG_ATTRIB_WPOS, 2))); 432 wpos.reg_offset++; 433 434 /* gl_FragCoord.w: Already set up in emit_interpolation */ 435 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 436 437 return reg; 438} 439 440fs_reg * 441fs_visitor::emit_general_interpolation(ir_variable *ir) 442{ 443 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 444 /* Interpolation is always in floating point regs. */ 445 reg->type = BRW_REGISTER_TYPE_F; 446 fs_reg attr = *reg; 447 448 unsigned int array_elements; 449 const glsl_type *type; 450 451 if (ir->type->is_array()) { 452 array_elements = ir->type->length; 453 if (array_elements == 0) { 454 this->fail = true; 455 } 456 type = ir->type->fields.array; 457 } else { 458 array_elements = 1; 459 type = ir->type; 460 } 461 462 int location = ir->location; 463 for (unsigned int i = 0; i < array_elements; i++) { 464 for (unsigned int j = 0; j < type->matrix_columns; j++) { 465 if (urb_setup[location] == -1) { 466 /* If there's no incoming setup data for this slot, don't 467 * emit interpolation for it. 468 */ 469 attr.reg_offset += type->vector_elements; 470 location++; 471 continue; 472 } 473 474 for (unsigned int c = 0; c < type->vector_elements; c++) { 475 struct brw_reg interp = interp_reg(location, c); 476 emit(fs_inst(FS_OPCODE_LINTERP, 477 attr, 478 this->delta_x, 479 this->delta_y, 480 fs_reg(interp))); 481 attr.reg_offset++; 482 } 483 484 if (intel->gen < 6) { 485 attr.reg_offset -= type->vector_elements; 486 for (unsigned int c = 0; c < type->vector_elements; c++) { 487 emit(fs_inst(BRW_OPCODE_MUL, 488 attr, 489 attr, 490 this->pixel_w)); 491 attr.reg_offset++; 492 } 493 } 494 location++; 495 } 496 } 497 498 return reg; 499} 500 501fs_reg * 502fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 503{ 504 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 505 506 /* The frontfacing comes in as a bit in the thread payload. */ 507 if (intel->gen >= 6) { 508 emit(fs_inst(BRW_OPCODE_ASR, 509 *reg, 510 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 511 fs_reg(15))); 512 emit(fs_inst(BRW_OPCODE_NOT, 513 *reg, 514 *reg)); 515 emit(fs_inst(BRW_OPCODE_AND, 516 *reg, 517 *reg, 518 fs_reg(1))); 519 } else { 520 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 521 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 522 * us front face 523 */ 524 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 525 *reg, 526 fs_reg(r1_6ud), 527 fs_reg(1u << 31))); 528 inst->conditional_mod = BRW_CONDITIONAL_L; 529 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 530 } 531 532 return reg; 533} 534 535fs_inst * 536fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 537{ 538 switch (opcode) { 539 case FS_OPCODE_RCP: 540 case FS_OPCODE_RSQ: 541 case FS_OPCODE_SQRT: 542 case FS_OPCODE_EXP2: 543 case FS_OPCODE_LOG2: 544 case FS_OPCODE_SIN: 545 case FS_OPCODE_COS: 546 break; 547 default: 548 assert(!"not reached: bad math opcode"); 549 return NULL; 550 } 551 552 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 553 * might be able to do better by doing execsize = 1 math and then 554 * expanding that result out, but we would need to be careful with 555 * masking. 556 */ 557 if (intel->gen >= 6 && src.file == UNIFORM) { 558 fs_reg expanded = fs_reg(this, glsl_type::float_type); 559 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 560 src = expanded; 561 } 562 563 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 564 565 if (intel->gen < 6) { 566 inst->base_mrf = 2; 567 inst->mlen = 1; 568 } 569 570 return inst; 571} 572 573fs_inst * 574fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 575{ 576 int base_mrf = 2; 577 fs_inst *inst; 578 579 assert(opcode == FS_OPCODE_POW); 580 581 if (intel->gen >= 6) { 582 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 583 if (src0.file == UNIFORM) { 584 fs_reg expanded = fs_reg(this, glsl_type::float_type); 585 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 586 src0 = expanded; 587 } 588 589 if (src1.file == UNIFORM) { 590 fs_reg expanded = fs_reg(this, glsl_type::float_type); 591 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 592 src1 = expanded; 593 } 594 595 inst = emit(fs_inst(opcode, dst, src0, src1)); 596 } else { 597 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 598 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 599 600 inst->base_mrf = base_mrf; 601 inst->mlen = 2; 602 } 603 return inst; 604} 605 606void 607fs_visitor::visit(ir_variable *ir) 608{ 609 fs_reg *reg = NULL; 610 611 if (variable_storage(ir)) 612 return; 613 614 if (strcmp(ir->name, "gl_FragColor") == 0) { 615 this->frag_color = ir; 616 } else if (strcmp(ir->name, "gl_FragData") == 0) { 617 this->frag_data = ir; 618 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 619 this->frag_depth = ir; 620 } 621 622 if (ir->mode == ir_var_in) { 623 if (!strcmp(ir->name, "gl_FragCoord")) { 624 reg = emit_fragcoord_interpolation(ir); 625 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 626 reg = emit_frontfacing_interpolation(ir); 627 } else { 628 reg = emit_general_interpolation(ir); 629 } 630 assert(reg); 631 hash_table_insert(this->variable_ht, reg, ir); 632 return; 633 } 634 635 if (ir->mode == ir_var_uniform) { 636 int param_index = c->prog_data.nr_params; 637 638 if (!strncmp(ir->name, "gl_", 3)) { 639 setup_builtin_uniform_values(ir); 640 } else { 641 setup_uniform_values(ir->location, ir->type); 642 } 643 644 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 645 reg->type = brw_type_for_base_type(ir->type); 646 } 647 648 if (!reg) 649 reg = new(this->mem_ctx) fs_reg(this, ir->type); 650 651 hash_table_insert(this->variable_ht, reg, ir); 652} 653 654void 655fs_visitor::visit(ir_dereference_variable *ir) 656{ 657 fs_reg *reg = variable_storage(ir->var); 658 this->result = *reg; 659} 660 661void 662fs_visitor::visit(ir_dereference_record *ir) 663{ 664 const glsl_type *struct_type = ir->record->type; 665 666 ir->record->accept(this); 667 668 unsigned int offset = 0; 669 for (unsigned int i = 0; i < struct_type->length; i++) { 670 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 671 break; 672 offset += type_size(struct_type->fields.structure[i].type); 673 } 674 this->result.reg_offset += offset; 675 this->result.type = brw_type_for_base_type(ir->type); 676} 677 678void 679fs_visitor::visit(ir_dereference_array *ir) 680{ 681 ir_constant *index; 682 int element_size; 683 684 ir->array->accept(this); 685 index = ir->array_index->as_constant(); 686 687 element_size = type_size(ir->type); 688 this->result.type = brw_type_for_base_type(ir->type); 689 690 if (index) { 691 assert(this->result.file == UNIFORM || 692 (this->result.file == GRF && 693 this->result.reg != 0)); 694 this->result.reg_offset += index->value.i[0] * element_size; 695 } else { 696 assert(!"FINISHME: non-constant array element"); 697 } 698} 699 700void 701fs_visitor::visit(ir_expression *ir) 702{ 703 unsigned int operand; 704 fs_reg op[2], temp; 705 fs_inst *inst; 706 707 for (operand = 0; operand < ir->get_num_operands(); operand++) { 708 ir->operands[operand]->accept(this); 709 if (this->result.file == BAD_FILE) { 710 ir_print_visitor v; 711 printf("Failed to get tree for expression operand:\n"); 712 ir->operands[operand]->accept(&v); 713 this->fail = true; 714 } 715 op[operand] = this->result; 716 717 /* Matrix expression operands should have been broken down to vector 718 * operations already. 719 */ 720 assert(!ir->operands[operand]->type->is_matrix()); 721 /* And then those vector operands should have been broken down to scalar. 722 */ 723 assert(!ir->operands[operand]->type->is_vector()); 724 } 725 726 /* Storage for our result. If our result goes into an assignment, it will 727 * just get copy-propagated out, so no worries. 728 */ 729 this->result = fs_reg(this, ir->type); 730 731 switch (ir->operation) { 732 case ir_unop_logic_not: 733 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 734 * ones complement of the whole register, not just bit 0. 735 */ 736 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 737 break; 738 case ir_unop_neg: 739 op[0].negate = !op[0].negate; 740 this->result = op[0]; 741 break; 742 case ir_unop_abs: 743 op[0].abs = true; 744 this->result = op[0]; 745 break; 746 case ir_unop_sign: 747 temp = fs_reg(this, ir->type); 748 749 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 750 751 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 752 inst->conditional_mod = BRW_CONDITIONAL_G; 753 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 754 inst->predicated = true; 755 756 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 757 inst->conditional_mod = BRW_CONDITIONAL_L; 758 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 759 inst->predicated = true; 760 761 break; 762 case ir_unop_rcp: 763 emit_math(FS_OPCODE_RCP, this->result, op[0]); 764 break; 765 766 case ir_unop_exp2: 767 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 768 break; 769 case ir_unop_log2: 770 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 771 break; 772 case ir_unop_exp: 773 case ir_unop_log: 774 assert(!"not reached: should be handled by ir_explog_to_explog2"); 775 break; 776 case ir_unop_sin: 777 emit_math(FS_OPCODE_SIN, this->result, op[0]); 778 break; 779 case ir_unop_cos: 780 emit_math(FS_OPCODE_COS, this->result, op[0]); 781 break; 782 783 case ir_unop_dFdx: 784 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 785 break; 786 case ir_unop_dFdy: 787 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 788 break; 789 790 case ir_binop_add: 791 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 792 break; 793 case ir_binop_sub: 794 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 795 break; 796 797 case ir_binop_mul: 798 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 799 break; 800 case ir_binop_div: 801 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 802 break; 803 case ir_binop_mod: 804 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 805 break; 806 807 case ir_binop_less: 808 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 809 inst->conditional_mod = BRW_CONDITIONAL_L; 810 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 811 break; 812 case ir_binop_greater: 813 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 814 inst->conditional_mod = BRW_CONDITIONAL_G; 815 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 816 break; 817 case ir_binop_lequal: 818 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 819 inst->conditional_mod = BRW_CONDITIONAL_LE; 820 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 821 break; 822 case ir_binop_gequal: 823 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 824 inst->conditional_mod = BRW_CONDITIONAL_GE; 825 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 826 break; 827 case ir_binop_equal: 828 case ir_binop_all_equal: /* same as nequal for scalars */ 829 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 830 inst->conditional_mod = BRW_CONDITIONAL_Z; 831 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 832 break; 833 case ir_binop_nequal: 834 case ir_binop_any_nequal: /* same as nequal for scalars */ 835 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 836 inst->conditional_mod = BRW_CONDITIONAL_NZ; 837 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 838 break; 839 840 case ir_binop_logic_xor: 841 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 842 break; 843 844 case ir_binop_logic_or: 845 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 846 break; 847 848 case ir_binop_logic_and: 849 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 850 break; 851 852 case ir_binop_dot: 853 case ir_unop_any: 854 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 855 break; 856 857 case ir_unop_noise: 858 assert(!"not reached: should be handled by lower_noise"); 859 break; 860 861 case ir_unop_sqrt: 862 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 863 break; 864 865 case ir_unop_rsq: 866 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 867 break; 868 869 case ir_unop_i2f: 870 case ir_unop_b2f: 871 case ir_unop_b2i: 872 case ir_unop_f2i: 873 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 874 break; 875 case ir_unop_f2b: 876 case ir_unop_i2b: 877 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 878 inst->conditional_mod = BRW_CONDITIONAL_NZ; 879 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 880 this->result, fs_reg(1))); 881 break; 882 883 case ir_unop_trunc: 884 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 885 break; 886 case ir_unop_ceil: 887 op[0].negate = !op[0].negate; 888 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 889 this->result.negate = true; 890 break; 891 case ir_unop_floor: 892 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 893 break; 894 case ir_unop_fract: 895 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 896 break; 897 case ir_unop_round_even: 898 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 899 break; 900 901 case ir_binop_min: 902 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 903 inst->conditional_mod = BRW_CONDITIONAL_L; 904 905 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 906 inst->predicated = true; 907 break; 908 case ir_binop_max: 909 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 910 inst->conditional_mod = BRW_CONDITIONAL_G; 911 912 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 913 inst->predicated = true; 914 break; 915 916 case ir_binop_pow: 917 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 918 break; 919 920 case ir_unop_bit_not: 921 inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); 922 break; 923 case ir_binop_bit_and: 924 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 925 break; 926 case ir_binop_bit_xor: 927 inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 928 break; 929 case ir_binop_bit_or: 930 inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 931 break; 932 933 case ir_unop_u2f: 934 case ir_binop_lshift: 935 case ir_binop_rshift: 936 assert(!"GLSL 1.30 features unsupported"); 937 break; 938 } 939} 940 941void 942fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 943 const glsl_type *type, bool predicated) 944{ 945 switch (type->base_type) { 946 case GLSL_TYPE_FLOAT: 947 case GLSL_TYPE_UINT: 948 case GLSL_TYPE_INT: 949 case GLSL_TYPE_BOOL: 950 for (unsigned int i = 0; i < type->components(); i++) { 951 l.type = brw_type_for_base_type(type); 952 r.type = brw_type_for_base_type(type); 953 954 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 955 inst->predicated = predicated; 956 957 l.reg_offset++; 958 r.reg_offset++; 959 } 960 break; 961 case GLSL_TYPE_ARRAY: 962 for (unsigned int i = 0; i < type->length; i++) { 963 emit_assignment_writes(l, r, type->fields.array, predicated); 964 } 965 break; 966 967 case GLSL_TYPE_STRUCT: 968 for (unsigned int i = 0; i < type->length; i++) { 969 emit_assignment_writes(l, r, type->fields.structure[i].type, 970 predicated); 971 } 972 break; 973 974 case GLSL_TYPE_SAMPLER: 975 break; 976 977 default: 978 assert(!"not reached"); 979 break; 980 } 981} 982 983void 984fs_visitor::visit(ir_assignment *ir) 985{ 986 struct fs_reg l, r; 987 fs_inst *inst; 988 989 /* FINISHME: arrays on the lhs */ 990 ir->lhs->accept(this); 991 l = this->result; 992 993 ir->rhs->accept(this); 994 r = this->result; 995 996 assert(l.file != BAD_FILE); 997 assert(r.file != BAD_FILE); 998 999 if (ir->condition) { 1000 emit_bool_to_cond_code(ir->condition); 1001 } 1002 1003 if (ir->lhs->type->is_scalar() || 1004 ir->lhs->type->is_vector()) { 1005 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1006 if (ir->write_mask & (1 << i)) { 1007 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1008 if (ir->condition) 1009 inst->predicated = true; 1010 r.reg_offset++; 1011 } 1012 l.reg_offset++; 1013 } 1014 } else { 1015 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1016 } 1017} 1018 1019fs_inst * 1020fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1021{ 1022 int mlen; 1023 int base_mrf = 1; 1024 bool simd16 = false; 1025 fs_reg orig_dst; 1026 1027 /* g0 header. */ 1028 mlen = 1; 1029 1030 if (ir->shadow_comparitor) { 1031 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1032 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1033 coordinate)); 1034 coordinate.reg_offset++; 1035 } 1036 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1037 mlen += 3; 1038 1039 if (ir->op == ir_tex) { 1040 /* There's no plain shadow compare message, so we use shadow 1041 * compare with a bias of 0.0. 1042 */ 1043 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1044 fs_reg(0.0f))); 1045 mlen++; 1046 } else if (ir->op == ir_txb) { 1047 ir->lod_info.bias->accept(this); 1048 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1049 this->result)); 1050 mlen++; 1051 } else { 1052 assert(ir->op == ir_txl); 1053 ir->lod_info.lod->accept(this); 1054 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1055 this->result)); 1056 mlen++; 1057 } 1058 1059 ir->shadow_comparitor->accept(this); 1060 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1061 mlen++; 1062 } else if (ir->op == ir_tex) { 1063 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1064 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1065 coordinate)); 1066 coordinate.reg_offset++; 1067 } 1068 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1069 mlen += 3; 1070 } else { 1071 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1072 * instructions. We'll need to do SIMD16 here. 1073 */ 1074 assert(ir->op == ir_txb || ir->op == ir_txl); 1075 1076 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1077 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1078 coordinate)); 1079 coordinate.reg_offset++; 1080 } 1081 1082 /* lod/bias appears after u/v/r. */ 1083 mlen += 6; 1084 1085 if (ir->op == ir_txb) { 1086 ir->lod_info.bias->accept(this); 1087 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1088 this->result)); 1089 mlen++; 1090 } else { 1091 ir->lod_info.lod->accept(this); 1092 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1093 this->result)); 1094 mlen++; 1095 } 1096 1097 /* The unused upper half. */ 1098 mlen++; 1099 1100 /* Now, since we're doing simd16, the return is 2 interleaved 1101 * vec4s where the odd-indexed ones are junk. We'll need to move 1102 * this weirdness around to the expected layout. 1103 */ 1104 simd16 = true; 1105 orig_dst = dst; 1106 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1107 2)); 1108 dst.type = BRW_REGISTER_TYPE_F; 1109 } 1110 1111 fs_inst *inst = NULL; 1112 switch (ir->op) { 1113 case ir_tex: 1114 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1115 break; 1116 case ir_txb: 1117 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1118 break; 1119 case ir_txl: 1120 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1121 break; 1122 case ir_txd: 1123 case ir_txf: 1124 assert(!"GLSL 1.30 features unsupported"); 1125 break; 1126 } 1127 inst->base_mrf = base_mrf; 1128 inst->mlen = mlen; 1129 1130 if (simd16) { 1131 for (int i = 0; i < 4; i++) { 1132 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1133 orig_dst.reg_offset++; 1134 dst.reg_offset += 2; 1135 } 1136 } 1137 1138 return inst; 1139} 1140 1141fs_inst * 1142fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1143{ 1144 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1145 * optional parameters like shadow comparitor or LOD bias. If 1146 * optional parameters aren't present, those base slots are 1147 * optional and don't need to be included in the message. 1148 * 1149 * We don't fill in the unnecessary slots regardless, which may 1150 * look surprising in the disassembly. 1151 */ 1152 int mlen = 1; /* g0 header always present. */ 1153 int base_mrf = 1; 1154 1155 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1156 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1157 coordinate)); 1158 coordinate.reg_offset++; 1159 } 1160 mlen += ir->coordinate->type->vector_elements; 1161 1162 if (ir->shadow_comparitor) { 1163 mlen = MAX2(mlen, 5); 1164 1165 ir->shadow_comparitor->accept(this); 1166 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1167 mlen++; 1168 } 1169 1170 fs_inst *inst = NULL; 1171 switch (ir->op) { 1172 case ir_tex: 1173 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1174 break; 1175 case ir_txb: 1176 ir->lod_info.bias->accept(this); 1177 mlen = MAX2(mlen, 5); 1178 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1179 mlen++; 1180 1181 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1182 break; 1183 case ir_txl: 1184 ir->lod_info.lod->accept(this); 1185 mlen = MAX2(mlen, 5); 1186 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1187 mlen++; 1188 1189 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1190 break; 1191 case ir_txd: 1192 case ir_txf: 1193 assert(!"GLSL 1.30 features unsupported"); 1194 break; 1195 } 1196 inst->base_mrf = base_mrf; 1197 inst->mlen = mlen; 1198 1199 return inst; 1200} 1201 1202void 1203fs_visitor::visit(ir_texture *ir) 1204{ 1205 int sampler; 1206 fs_inst *inst = NULL; 1207 1208 ir->coordinate->accept(this); 1209 fs_reg coordinate = this->result; 1210 1211 /* Should be lowered by do_lower_texture_projection */ 1212 assert(!ir->projector); 1213 1214 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1215 ctx->Shader.CurrentFragmentProgram, 1216 &brw->fragment_program->Base); 1217 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1218 1219 /* The 965 requires the EU to do the normalization of GL rectangle 1220 * texture coordinates. We use the program parameter state 1221 * tracking to get the scaling factor. 1222 */ 1223 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1224 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1225 int tokens[STATE_LENGTH] = { 1226 STATE_INTERNAL, 1227 STATE_TEXRECT_SCALE, 1228 sampler, 1229 0, 1230 0 1231 }; 1232 1233 c->prog_data.param_convert[c->prog_data.nr_params] = 1234 PARAM_NO_CONVERT; 1235 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1236 PARAM_NO_CONVERT; 1237 1238 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1239 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1240 GLuint index = _mesa_add_state_reference(params, 1241 (gl_state_index *)tokens); 1242 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 1243 1244 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; 1245 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; 1246 1247 fs_reg dst = fs_reg(this, ir->coordinate->type); 1248 fs_reg src = coordinate; 1249 coordinate = dst; 1250 1251 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1252 dst.reg_offset++; 1253 src.reg_offset++; 1254 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1255 } 1256 1257 /* Writemasking doesn't eliminate channels on SIMD8 texture 1258 * samples, so don't worry about them. 1259 */ 1260 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1261 1262 if (intel->gen < 5) { 1263 inst = emit_texture_gen4(ir, dst, coordinate); 1264 } else { 1265 inst = emit_texture_gen5(ir, dst, coordinate); 1266 } 1267 1268 inst->sampler = sampler; 1269 1270 this->result = dst; 1271 1272 if (ir->shadow_comparitor) 1273 inst->shadow_compare = true; 1274 1275 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1276 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1277 1278 for (int i = 0; i < 4; i++) { 1279 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1280 fs_reg l = swizzle_dst; 1281 l.reg_offset += i; 1282 1283 if (swiz == SWIZZLE_ZERO) { 1284 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1285 } else if (swiz == SWIZZLE_ONE) { 1286 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1287 } else { 1288 fs_reg r = dst; 1289 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1290 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1291 } 1292 } 1293 this->result = swizzle_dst; 1294 } 1295} 1296 1297void 1298fs_visitor::visit(ir_swizzle *ir) 1299{ 1300 ir->val->accept(this); 1301 fs_reg val = this->result; 1302 1303 if (ir->type->vector_elements == 1) { 1304 this->result.reg_offset += ir->mask.x; 1305 return; 1306 } 1307 1308 fs_reg result = fs_reg(this, ir->type); 1309 this->result = result; 1310 1311 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1312 fs_reg channel = val; 1313 int swiz = 0; 1314 1315 switch (i) { 1316 case 0: 1317 swiz = ir->mask.x; 1318 break; 1319 case 1: 1320 swiz = ir->mask.y; 1321 break; 1322 case 2: 1323 swiz = ir->mask.z; 1324 break; 1325 case 3: 1326 swiz = ir->mask.w; 1327 break; 1328 } 1329 1330 channel.reg_offset += swiz; 1331 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1332 result.reg_offset++; 1333 } 1334} 1335 1336void 1337fs_visitor::visit(ir_discard *ir) 1338{ 1339 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1340 1341 assert(ir->condition == NULL); /* FINISHME */ 1342 1343 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1344 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1345 kill_emitted = true; 1346} 1347 1348void 1349fs_visitor::visit(ir_constant *ir) 1350{ 1351 fs_reg reg(this, ir->type); 1352 this->result = reg; 1353 1354 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1355 switch (ir->type->base_type) { 1356 case GLSL_TYPE_FLOAT: 1357 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1358 break; 1359 case GLSL_TYPE_UINT: 1360 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1361 break; 1362 case GLSL_TYPE_INT: 1363 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1364 break; 1365 case GLSL_TYPE_BOOL: 1366 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1367 break; 1368 default: 1369 assert(!"Non-float/uint/int/bool constant"); 1370 } 1371 reg.reg_offset++; 1372 } 1373} 1374 1375void 1376fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1377{ 1378 ir_expression *expr = ir->as_expression(); 1379 1380 if (expr) { 1381 fs_reg op[2]; 1382 fs_inst *inst; 1383 1384 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1385 assert(expr->operands[i]->type->is_scalar()); 1386 1387 expr->operands[i]->accept(this); 1388 op[i] = this->result; 1389 } 1390 1391 switch (expr->operation) { 1392 case ir_unop_logic_not: 1393 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1394 inst->conditional_mod = BRW_CONDITIONAL_Z; 1395 break; 1396 1397 case ir_binop_logic_xor: 1398 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1399 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1400 break; 1401 1402 case ir_binop_logic_or: 1403 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1404 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1405 break; 1406 1407 case ir_binop_logic_and: 1408 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1409 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1410 break; 1411 1412 case ir_unop_f2b: 1413 if (intel->gen >= 6) { 1414 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1415 op[0], fs_reg(0.0f))); 1416 } else { 1417 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1418 } 1419 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1420 break; 1421 1422 case ir_unop_i2b: 1423 if (intel->gen >= 6) { 1424 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1425 } else { 1426 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1427 } 1428 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1429 break; 1430 1431 case ir_binop_greater: 1432 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1433 inst->conditional_mod = BRW_CONDITIONAL_G; 1434 break; 1435 case ir_binop_gequal: 1436 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1437 inst->conditional_mod = BRW_CONDITIONAL_GE; 1438 break; 1439 case ir_binop_less: 1440 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1441 inst->conditional_mod = BRW_CONDITIONAL_L; 1442 break; 1443 case ir_binop_lequal: 1444 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1445 inst->conditional_mod = BRW_CONDITIONAL_LE; 1446 break; 1447 case ir_binop_equal: 1448 case ir_binop_all_equal: 1449 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1450 inst->conditional_mod = BRW_CONDITIONAL_Z; 1451 break; 1452 case ir_binop_nequal: 1453 case ir_binop_any_nequal: 1454 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1455 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1456 break; 1457 default: 1458 assert(!"not reached"); 1459 this->fail = true; 1460 break; 1461 } 1462 return; 1463 } 1464 1465 ir->accept(this); 1466 1467 if (intel->gen >= 6) { 1468 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1469 this->result, fs_reg(1))); 1470 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1471 } else { 1472 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1473 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1474 } 1475} 1476 1477/** 1478 * Emit a gen6 IF statement with the comparison folded into the IF 1479 * instruction. 1480 */ 1481void 1482fs_visitor::emit_if_gen6(ir_if *ir) 1483{ 1484 ir_expression *expr = ir->condition->as_expression(); 1485 1486 if (expr) { 1487 fs_reg op[2]; 1488 fs_inst *inst; 1489 fs_reg temp; 1490 1491 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1492 assert(expr->operands[i]->type->is_scalar()); 1493 1494 expr->operands[i]->accept(this); 1495 op[i] = this->result; 1496 } 1497 1498 switch (expr->operation) { 1499 case ir_unop_logic_not: 1500 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); 1501 inst->conditional_mod = BRW_CONDITIONAL_Z; 1502 return; 1503 1504 case ir_binop_logic_xor: 1505 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1506 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1507 return; 1508 1509 case ir_binop_logic_or: 1510 temp = fs_reg(this, glsl_type::bool_type); 1511 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1512 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1513 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1514 return; 1515 1516 case ir_binop_logic_and: 1517 temp = fs_reg(this, glsl_type::bool_type); 1518 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1519 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1520 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1521 return; 1522 1523 case ir_unop_f2b: 1524 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1525 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1526 return; 1527 1528 case ir_unop_i2b: 1529 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1530 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1531 return; 1532 1533 case ir_binop_greater: 1534 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1535 inst->conditional_mod = BRW_CONDITIONAL_G; 1536 return; 1537 case ir_binop_gequal: 1538 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1539 inst->conditional_mod = BRW_CONDITIONAL_GE; 1540 return; 1541 case ir_binop_less: 1542 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1543 inst->conditional_mod = BRW_CONDITIONAL_L; 1544 return; 1545 case ir_binop_lequal: 1546 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1547 inst->conditional_mod = BRW_CONDITIONAL_LE; 1548 return; 1549 case ir_binop_equal: 1550 case ir_binop_all_equal: 1551 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1552 inst->conditional_mod = BRW_CONDITIONAL_Z; 1553 return; 1554 case ir_binop_nequal: 1555 case ir_binop_any_nequal: 1556 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1557 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1558 return; 1559 default: 1560 assert(!"not reached"); 1561 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1562 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1563 this->fail = true; 1564 return; 1565 } 1566 return; 1567 } 1568 1569 ir->condition->accept(this); 1570 1571 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1572 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1573} 1574 1575void 1576fs_visitor::visit(ir_if *ir) 1577{ 1578 fs_inst *inst; 1579 1580 /* Don't point the annotation at the if statement, because then it plus 1581 * the then and else blocks get printed. 1582 */ 1583 this->base_ir = ir->condition; 1584 1585 if (intel->gen >= 6) { 1586 emit_if_gen6(ir); 1587 } else { 1588 emit_bool_to_cond_code(ir->condition); 1589 1590 inst = emit(fs_inst(BRW_OPCODE_IF)); 1591 inst->predicated = true; 1592 } 1593 1594 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1595 ir_instruction *ir = (ir_instruction *)iter.get(); 1596 this->base_ir = ir; 1597 1598 ir->accept(this); 1599 } 1600 1601 if (!ir->else_instructions.is_empty()) { 1602 emit(fs_inst(BRW_OPCODE_ELSE)); 1603 1604 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1605 ir_instruction *ir = (ir_instruction *)iter.get(); 1606 this->base_ir = ir; 1607 1608 ir->accept(this); 1609 } 1610 } 1611 1612 emit(fs_inst(BRW_OPCODE_ENDIF)); 1613} 1614 1615void 1616fs_visitor::visit(ir_loop *ir) 1617{ 1618 fs_reg counter = reg_undef; 1619 1620 if (ir->counter) { 1621 this->base_ir = ir->counter; 1622 ir->counter->accept(this); 1623 counter = *(variable_storage(ir->counter)); 1624 1625 if (ir->from) { 1626 this->base_ir = ir->from; 1627 ir->from->accept(this); 1628 1629 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1630 } 1631 } 1632 1633 emit(fs_inst(BRW_OPCODE_DO)); 1634 1635 if (ir->to) { 1636 this->base_ir = ir->to; 1637 ir->to->accept(this); 1638 1639 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1640 counter, this->result)); 1641 switch (ir->cmp) { 1642 case ir_binop_equal: 1643 inst->conditional_mod = BRW_CONDITIONAL_Z; 1644 break; 1645 case ir_binop_nequal: 1646 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1647 break; 1648 case ir_binop_gequal: 1649 inst->conditional_mod = BRW_CONDITIONAL_GE; 1650 break; 1651 case ir_binop_lequal: 1652 inst->conditional_mod = BRW_CONDITIONAL_LE; 1653 break; 1654 case ir_binop_greater: 1655 inst->conditional_mod = BRW_CONDITIONAL_G; 1656 break; 1657 case ir_binop_less: 1658 inst->conditional_mod = BRW_CONDITIONAL_L; 1659 break; 1660 default: 1661 assert(!"not reached: unknown loop condition"); 1662 this->fail = true; 1663 break; 1664 } 1665 1666 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1667 inst->predicated = true; 1668 } 1669 1670 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1671 ir_instruction *ir = (ir_instruction *)iter.get(); 1672 1673 this->base_ir = ir; 1674 ir->accept(this); 1675 } 1676 1677 if (ir->increment) { 1678 this->base_ir = ir->increment; 1679 ir->increment->accept(this); 1680 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1681 } 1682 1683 emit(fs_inst(BRW_OPCODE_WHILE)); 1684} 1685 1686void 1687fs_visitor::visit(ir_loop_jump *ir) 1688{ 1689 switch (ir->mode) { 1690 case ir_loop_jump::jump_break: 1691 emit(fs_inst(BRW_OPCODE_BREAK)); 1692 break; 1693 case ir_loop_jump::jump_continue: 1694 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1695 break; 1696 } 1697} 1698 1699void 1700fs_visitor::visit(ir_call *ir) 1701{ 1702 assert(!"FINISHME"); 1703} 1704 1705void 1706fs_visitor::visit(ir_return *ir) 1707{ 1708 assert(!"FINISHME"); 1709} 1710 1711void 1712fs_visitor::visit(ir_function *ir) 1713{ 1714 /* Ignore function bodies other than main() -- we shouldn't see calls to 1715 * them since they should all be inlined before we get to ir_to_mesa. 1716 */ 1717 if (strcmp(ir->name, "main") == 0) { 1718 const ir_function_signature *sig; 1719 exec_list empty; 1720 1721 sig = ir->matching_signature(&empty); 1722 1723 assert(sig); 1724 1725 foreach_iter(exec_list_iterator, iter, sig->body) { 1726 ir_instruction *ir = (ir_instruction *)iter.get(); 1727 this->base_ir = ir; 1728 1729 ir->accept(this); 1730 } 1731 } 1732} 1733 1734void 1735fs_visitor::visit(ir_function_signature *ir) 1736{ 1737 assert(!"not reached"); 1738 (void)ir; 1739} 1740 1741fs_inst * 1742fs_visitor::emit(fs_inst inst) 1743{ 1744 fs_inst *list_inst = new(mem_ctx) fs_inst; 1745 *list_inst = inst; 1746 1747 list_inst->annotation = this->current_annotation; 1748 list_inst->ir = this->base_ir; 1749 1750 this->instructions.push_tail(list_inst); 1751 1752 return list_inst; 1753} 1754 1755/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1756void 1757fs_visitor::emit_dummy_fs() 1758{ 1759 /* Everyone's favorite color. */ 1760 emit(fs_inst(BRW_OPCODE_MOV, 1761 fs_reg(MRF, 2), 1762 fs_reg(1.0f))); 1763 emit(fs_inst(BRW_OPCODE_MOV, 1764 fs_reg(MRF, 3), 1765 fs_reg(0.0f))); 1766 emit(fs_inst(BRW_OPCODE_MOV, 1767 fs_reg(MRF, 4), 1768 fs_reg(1.0f))); 1769 emit(fs_inst(BRW_OPCODE_MOV, 1770 fs_reg(MRF, 5), 1771 fs_reg(0.0f))); 1772 1773 fs_inst *write; 1774 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1775 fs_reg(0), 1776 fs_reg(0))); 1777 write->base_mrf = 0; 1778} 1779 1780/* The register location here is relative to the start of the URB 1781 * data. It will get adjusted to be a real location before 1782 * generate_code() time. 1783 */ 1784struct brw_reg 1785fs_visitor::interp_reg(int location, int channel) 1786{ 1787 int regnr = urb_setup[location] * 2 + channel / 2; 1788 int stride = (channel & 1) * 4; 1789 1790 assert(urb_setup[location] != -1); 1791 1792 return brw_vec1_grf(regnr, stride); 1793} 1794 1795/** Emits the interpolation for the varying inputs. */ 1796void 1797fs_visitor::emit_interpolation_setup_gen4() 1798{ 1799 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1800 1801 this->current_annotation = "compute pixel centers"; 1802 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1803 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1804 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1805 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1806 emit(fs_inst(BRW_OPCODE_ADD, 1807 this->pixel_x, 1808 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1809 fs_reg(brw_imm_v(0x10101010)))); 1810 emit(fs_inst(BRW_OPCODE_ADD, 1811 this->pixel_y, 1812 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1813 fs_reg(brw_imm_v(0x11001100)))); 1814 1815 this->current_annotation = "compute pixel deltas from v0"; 1816 if (brw->has_pln) { 1817 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1818 this->delta_y = this->delta_x; 1819 this->delta_y.reg_offset++; 1820 } else { 1821 this->delta_x = fs_reg(this, glsl_type::float_type); 1822 this->delta_y = fs_reg(this, glsl_type::float_type); 1823 } 1824 emit(fs_inst(BRW_OPCODE_ADD, 1825 this->delta_x, 1826 this->pixel_x, 1827 fs_reg(negate(brw_vec1_grf(1, 0))))); 1828 emit(fs_inst(BRW_OPCODE_ADD, 1829 this->delta_y, 1830 this->pixel_y, 1831 fs_reg(negate(brw_vec1_grf(1, 1))))); 1832 1833 this->current_annotation = "compute pos.w and 1/pos.w"; 1834 /* Compute wpos.w. It's always in our setup, since it's needed to 1835 * interpolate the other attributes. 1836 */ 1837 this->wpos_w = fs_reg(this, glsl_type::float_type); 1838 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1839 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1840 /* Compute the pixel 1/W value from wpos.w. */ 1841 this->pixel_w = fs_reg(this, glsl_type::float_type); 1842 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1843 this->current_annotation = NULL; 1844} 1845 1846/** Emits the interpolation for the varying inputs. */ 1847void 1848fs_visitor::emit_interpolation_setup_gen6() 1849{ 1850 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1851 1852 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1853 this->current_annotation = "compute pixel centers"; 1854 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1855 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1856 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1857 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1858 emit(fs_inst(BRW_OPCODE_ADD, 1859 int_pixel_x, 1860 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1861 fs_reg(brw_imm_v(0x10101010)))); 1862 emit(fs_inst(BRW_OPCODE_ADD, 1863 int_pixel_y, 1864 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1865 fs_reg(brw_imm_v(0x11001100)))); 1866 1867 /* As of gen6, we can no longer mix float and int sources. We have 1868 * to turn the integer pixel centers into floats for their actual 1869 * use. 1870 */ 1871 this->pixel_x = fs_reg(this, glsl_type::float_type); 1872 this->pixel_y = fs_reg(this, glsl_type::float_type); 1873 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1874 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1875 1876 this->current_annotation = "compute 1/pos.w"; 1877 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1878 this->pixel_w = fs_reg(this, glsl_type::float_type); 1879 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1880 1881 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1882 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1883 1884 this->current_annotation = NULL; 1885} 1886 1887void 1888fs_visitor::emit_fb_writes() 1889{ 1890 this->current_annotation = "FB write header"; 1891 GLboolean header_present = GL_TRUE; 1892 int nr = 0; 1893 1894 if (intel->gen >= 6 && 1895 !this->kill_emitted && 1896 c->key.nr_color_regions == 1) { 1897 header_present = false; 1898 } 1899 1900 if (header_present) { 1901 /* m0, m1 header */ 1902 nr += 2; 1903 } 1904 1905 if (c->key.aa_dest_stencil_reg) { 1906 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1907 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1908 } 1909 1910 /* Reserve space for color. It'll be filled in per MRT below. */ 1911 int color_mrf = nr; 1912 nr += 4; 1913 1914 if (c->key.source_depth_to_render_target) { 1915 if (c->key.computes_depth) { 1916 /* Hand over gl_FragDepth. */ 1917 assert(this->frag_depth); 1918 fs_reg depth = *(variable_storage(this->frag_depth)); 1919 1920 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1921 } else { 1922 /* Pass through the payload depth. */ 1923 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1924 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1925 } 1926 } 1927 1928 if (c->key.dest_depth_reg) { 1929 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1930 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1931 } 1932 1933 fs_reg color = reg_undef; 1934 if (this->frag_color) 1935 color = *(variable_storage(this->frag_color)); 1936 else if (this->frag_data) 1937 color = *(variable_storage(this->frag_data)); 1938 1939 for (int target = 0; target < c->key.nr_color_regions; target++) { 1940 this->current_annotation = talloc_asprintf(this->mem_ctx, 1941 "FB write target %d", 1942 target); 1943 if (this->frag_color || this->frag_data) { 1944 for (int i = 0; i < 4; i++) { 1945 emit(fs_inst(BRW_OPCODE_MOV, 1946 fs_reg(MRF, color_mrf + i), 1947 color)); 1948 color.reg_offset++; 1949 } 1950 } 1951 1952 if (this->frag_color) 1953 color.reg_offset -= 4; 1954 1955 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1956 reg_undef, reg_undef)); 1957 inst->target = target; 1958 inst->base_mrf = 0; 1959 inst->mlen = nr; 1960 if (target == c->key.nr_color_regions - 1) 1961 inst->eot = true; 1962 inst->header_present = header_present; 1963 } 1964 1965 if (c->key.nr_color_regions == 0) { 1966 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1967 reg_undef, reg_undef)); 1968 inst->base_mrf = 0; 1969 inst->mlen = nr; 1970 inst->eot = true; 1971 inst->header_present = header_present; 1972 } 1973 1974 this->current_annotation = NULL; 1975} 1976 1977void 1978fs_visitor::generate_fb_write(fs_inst *inst) 1979{ 1980 GLboolean eot = inst->eot; 1981 struct brw_reg implied_header; 1982 1983 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1984 * move, here's g1. 1985 */ 1986 brw_push_insn_state(p); 1987 brw_set_mask_control(p, BRW_MASK_DISABLE); 1988 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1989 1990 if (inst->header_present) { 1991 if (intel->gen >= 6) { 1992 brw_MOV(p, 1993 brw_message_reg(inst->base_mrf), 1994 brw_vec8_grf(0, 0)); 1995 1996 if (inst->target > 0) { 1997 /* Set the render target index for choosing BLEND_STATE. */ 1998 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 1999 BRW_REGISTER_TYPE_UD), 2000 brw_imm_ud(inst->target)); 2001 } 2002 2003 /* Clear viewport index, render target array index. */ 2004 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 2005 BRW_REGISTER_TYPE_UD), 2006 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 2007 brw_imm_ud(0xf7ff)); 2008 2009 implied_header = brw_null_reg(); 2010 } else { 2011 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 2012 } 2013 2014 brw_MOV(p, 2015 brw_message_reg(inst->base_mrf + 1), 2016 brw_vec8_grf(1, 0)); 2017 } else { 2018 implied_header = brw_null_reg(); 2019 } 2020 2021 brw_pop_insn_state(p); 2022 2023 brw_fb_WRITE(p, 2024 8, /* dispatch_width */ 2025 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 2026 inst->base_mrf, 2027 implied_header, 2028 inst->target, 2029 inst->mlen, 2030 0, 2031 eot); 2032} 2033 2034void 2035fs_visitor::generate_linterp(fs_inst *inst, 2036 struct brw_reg dst, struct brw_reg *src) 2037{ 2038 struct brw_reg delta_x = src[0]; 2039 struct brw_reg delta_y = src[1]; 2040 struct brw_reg interp = src[2]; 2041 2042 if (brw->has_pln && 2043 delta_y.nr == delta_x.nr + 1 && 2044 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2045 brw_PLN(p, dst, interp, delta_x); 2046 } else { 2047 brw_LINE(p, brw_null_reg(), interp, delta_x); 2048 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2049 } 2050} 2051 2052void 2053fs_visitor::generate_math(fs_inst *inst, 2054 struct brw_reg dst, struct brw_reg *src) 2055{ 2056 int op; 2057 2058 switch (inst->opcode) { 2059 case FS_OPCODE_RCP: 2060 op = BRW_MATH_FUNCTION_INV; 2061 break; 2062 case FS_OPCODE_RSQ: 2063 op = BRW_MATH_FUNCTION_RSQ; 2064 break; 2065 case FS_OPCODE_SQRT: 2066 op = BRW_MATH_FUNCTION_SQRT; 2067 break; 2068 case FS_OPCODE_EXP2: 2069 op = BRW_MATH_FUNCTION_EXP; 2070 break; 2071 case FS_OPCODE_LOG2: 2072 op = BRW_MATH_FUNCTION_LOG; 2073 break; 2074 case FS_OPCODE_POW: 2075 op = BRW_MATH_FUNCTION_POW; 2076 break; 2077 case FS_OPCODE_SIN: 2078 op = BRW_MATH_FUNCTION_SIN; 2079 break; 2080 case FS_OPCODE_COS: 2081 op = BRW_MATH_FUNCTION_COS; 2082 break; 2083 default: 2084 assert(!"not reached: unknown math function"); 2085 op = 0; 2086 break; 2087 } 2088 2089 if (intel->gen >= 6) { 2090 assert(inst->mlen == 0); 2091 2092 if (inst->opcode == FS_OPCODE_POW) { 2093 brw_math2(p, dst, op, src[0], src[1]); 2094 } else { 2095 brw_math(p, dst, 2096 op, 2097 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2098 BRW_MATH_SATURATE_NONE, 2099 0, src[0], 2100 BRW_MATH_DATA_VECTOR, 2101 BRW_MATH_PRECISION_FULL); 2102 } 2103 } else { 2104 assert(inst->mlen >= 1); 2105 2106 brw_math(p, dst, 2107 op, 2108 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2109 BRW_MATH_SATURATE_NONE, 2110 inst->base_mrf, src[0], 2111 BRW_MATH_DATA_VECTOR, 2112 BRW_MATH_PRECISION_FULL); 2113 } 2114} 2115 2116void 2117fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 2118{ 2119 int msg_type = -1; 2120 int rlen = 4; 2121 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2122 2123 if (intel->gen >= 5) { 2124 switch (inst->opcode) { 2125 case FS_OPCODE_TEX: 2126 if (inst->shadow_compare) { 2127 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2128 } else { 2129 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2130 } 2131 break; 2132 case FS_OPCODE_TXB: 2133 if (inst->shadow_compare) { 2134 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2135 } else { 2136 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2137 } 2138 break; 2139 } 2140 } else { 2141 switch (inst->opcode) { 2142 case FS_OPCODE_TEX: 2143 /* Note that G45 and older determines shadow compare and dispatch width 2144 * from message length for most messages. 2145 */ 2146 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2147 if (inst->shadow_compare) { 2148 assert(inst->mlen == 6); 2149 } else { 2150 assert(inst->mlen <= 4); 2151 } 2152 break; 2153 case FS_OPCODE_TXB: 2154 if (inst->shadow_compare) { 2155 assert(inst->mlen == 6); 2156 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2157 } else { 2158 assert(inst->mlen == 9); 2159 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2160 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2161 } 2162 break; 2163 } 2164 } 2165 assert(msg_type != -1); 2166 2167 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2168 rlen = 8; 2169 dst = vec16(dst); 2170 } 2171 2172 brw_SAMPLE(p, 2173 retype(dst, BRW_REGISTER_TYPE_UW), 2174 inst->base_mrf, 2175 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2176 SURF_INDEX_TEXTURE(inst->sampler), 2177 inst->sampler, 2178 WRITEMASK_XYZW, 2179 msg_type, 2180 rlen, 2181 inst->mlen, 2182 0, 2183 1, 2184 simd_mode); 2185} 2186 2187 2188/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2189 * looking like: 2190 * 2191 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2192 * 2193 * and we're trying to produce: 2194 * 2195 * DDX DDY 2196 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2197 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2198 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2199 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2200 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2201 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2202 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2203 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2204 * 2205 * and add another set of two more subspans if in 16-pixel dispatch mode. 2206 * 2207 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2208 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2209 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2210 * between each other. We could probably do it like ddx and swizzle the right 2211 * order later, but bail for now and just produce 2212 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2213 */ 2214void 2215fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2216{ 2217 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2218 BRW_REGISTER_TYPE_F, 2219 BRW_VERTICAL_STRIDE_2, 2220 BRW_WIDTH_2, 2221 BRW_HORIZONTAL_STRIDE_0, 2222 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2223 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2224 BRW_REGISTER_TYPE_F, 2225 BRW_VERTICAL_STRIDE_2, 2226 BRW_WIDTH_2, 2227 BRW_HORIZONTAL_STRIDE_0, 2228 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2229 brw_ADD(p, dst, src0, negate(src1)); 2230} 2231 2232void 2233fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2234{ 2235 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2236 BRW_REGISTER_TYPE_F, 2237 BRW_VERTICAL_STRIDE_4, 2238 BRW_WIDTH_4, 2239 BRW_HORIZONTAL_STRIDE_0, 2240 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2241 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2242 BRW_REGISTER_TYPE_F, 2243 BRW_VERTICAL_STRIDE_4, 2244 BRW_WIDTH_4, 2245 BRW_HORIZONTAL_STRIDE_0, 2246 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2247 brw_ADD(p, dst, src0, negate(src1)); 2248} 2249 2250void 2251fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2252{ 2253 if (intel->gen >= 6) { 2254 /* Gen6 no longer has the mask reg for us to just read the 2255 * active channels from. However, cmp updates just the channels 2256 * of the flag reg that are enabled, so we can get at the 2257 * channel enables that way. In this step, make a reg of ones 2258 * we'll compare to. 2259 */ 2260 brw_MOV(p, mask, brw_imm_ud(1)); 2261 } else { 2262 brw_push_insn_state(p); 2263 brw_set_mask_control(p, BRW_MASK_DISABLE); 2264 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2265 brw_pop_insn_state(p); 2266 } 2267} 2268 2269void 2270fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2271{ 2272 if (intel->gen >= 6) { 2273 struct brw_reg f0 = brw_flag_reg(); 2274 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2275 2276 brw_push_insn_state(p); 2277 brw_set_mask_control(p, BRW_MASK_DISABLE); 2278 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2279 brw_pop_insn_state(p); 2280 2281 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2282 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2283 /* Undo CMP's whacking of predication*/ 2284 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2285 2286 brw_push_insn_state(p); 2287 brw_set_mask_control(p, BRW_MASK_DISABLE); 2288 brw_AND(p, g1, f0, g1); 2289 brw_pop_insn_state(p); 2290 } else { 2291 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2292 2293 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2294 2295 brw_push_insn_state(p); 2296 brw_set_mask_control(p, BRW_MASK_DISABLE); 2297 brw_AND(p, g0, mask, g0); 2298 brw_pop_insn_state(p); 2299 } 2300} 2301 2302void 2303fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2304{ 2305 assert(inst->mlen != 0); 2306 2307 brw_MOV(p, 2308 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2309 retype(src, BRW_REGISTER_TYPE_UD)); 2310 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2311 inst->offset); 2312} 2313 2314void 2315fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2316{ 2317 assert(inst->mlen != 0); 2318 2319 /* Clear any post destination dependencies that would be ignored by 2320 * the block read. See the B-Spec for pre-gen5 send instruction. 2321 * 2322 * This could use a better solution, since texture sampling and 2323 * math reads could potentially run into it as well -- anywhere 2324 * that we have a SEND with a destination that is a register that 2325 * was written but not read within the last N instructions (what's 2326 * N? unsure). This is rare because of dead code elimination, but 2327 * not impossible. 2328 */ 2329 if (intel->gen == 4 && !intel->is_g4x) 2330 brw_MOV(p, brw_null_reg(), dst); 2331 2332 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2333 inst->offset); 2334 2335 if (intel->gen == 4 && !intel->is_g4x) { 2336 /* gen4 errata: destination from a send can't be used as a 2337 * destination until it's been read. Just read it so we don't 2338 * have to worry. 2339 */ 2340 brw_MOV(p, brw_null_reg(), dst); 2341 } 2342} 2343 2344 2345void 2346fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2347{ 2348 assert(inst->mlen != 0); 2349 2350 /* Clear any post destination dependencies that would be ignored by 2351 * the block read. See the B-Spec for pre-gen5 send instruction. 2352 * 2353 * This could use a better solution, since texture sampling and 2354 * math reads could potentially run into it as well -- anywhere 2355 * that we have a SEND with a destination that is a register that 2356 * was written but not read within the last N instructions (what's 2357 * N? unsure). This is rare because of dead code elimination, but 2358 * not impossible. 2359 */ 2360 if (intel->gen == 4 && !intel->is_g4x) 2361 brw_MOV(p, brw_null_reg(), dst); 2362 2363 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2364 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2365 2366 if (intel->gen == 4 && !intel->is_g4x) { 2367 /* gen4 errata: destination from a send can't be used as a 2368 * destination until it's been read. Just read it so we don't 2369 * have to worry. 2370 */ 2371 brw_MOV(p, brw_null_reg(), dst); 2372 } 2373} 2374 2375void 2376fs_visitor::assign_curb_setup() 2377{ 2378 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 2379 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2380 2381 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2382 foreach_iter(exec_list_iterator, iter, this->instructions) { 2383 fs_inst *inst = (fs_inst *)iter.get(); 2384 2385 for (unsigned int i = 0; i < 3; i++) { 2386 if (inst->src[i].file == UNIFORM) { 2387 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2388 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2389 constant_nr / 8, 2390 constant_nr % 8); 2391 2392 inst->src[i].file = FIXED_HW_REG; 2393 inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); 2394 } 2395 } 2396 } 2397} 2398 2399void 2400fs_visitor::calculate_urb_setup() 2401{ 2402 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2403 urb_setup[i] = -1; 2404 } 2405 2406 int urb_next = 0; 2407 /* Figure out where each of the incoming setup attributes lands. */ 2408 if (intel->gen >= 6) { 2409 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2410 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2411 urb_setup[i] = urb_next++; 2412 } 2413 } 2414 } else { 2415 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2416 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2417 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2418 int fp_index; 2419 2420 if (i >= VERT_RESULT_VAR0) 2421 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2422 else if (i <= VERT_RESULT_TEX7) 2423 fp_index = i; 2424 else 2425 fp_index = -1; 2426 2427 if (fp_index >= 0) 2428 urb_setup[fp_index] = urb_next++; 2429 } 2430 } 2431 } 2432 2433 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2434 c->prog_data.urb_read_length = urb_next * 2; 2435} 2436 2437void 2438fs_visitor::assign_urb_setup() 2439{ 2440 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2441 2442 /* Offset all the urb_setup[] index by the actual position of the 2443 * setup regs, now that the location of the constants has been chosen. 2444 */ 2445 foreach_iter(exec_list_iterator, iter, this->instructions) { 2446 fs_inst *inst = (fs_inst *)iter.get(); 2447 2448 if (inst->opcode != FS_OPCODE_LINTERP) 2449 continue; 2450 2451 assert(inst->src[2].file == FIXED_HW_REG); 2452 2453 inst->src[2].fixed_hw_reg.nr += urb_start; 2454 } 2455 2456 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2457} 2458 2459/** 2460 * Split large virtual GRFs into separate components if we can. 2461 * 2462 * This is mostly duplicated with what brw_fs_vector_splitting does, 2463 * but that's really conservative because it's afraid of doing 2464 * splitting that doesn't result in real progress after the rest of 2465 * the optimization phases, which would cause infinite looping in 2466 * optimization. We can do it once here, safely. This also has the 2467 * opportunity to split interpolated values, or maybe even uniforms, 2468 * which we don't have at the IR level. 2469 * 2470 * We want to split, because virtual GRFs are what we register 2471 * allocate and spill (due to contiguousness requirements for some 2472 * instructions), and they're what we naturally generate in the 2473 * codegen process, but most virtual GRFs don't actually need to be 2474 * contiguous sets of GRFs. If we split, we'll end up with reduced 2475 * live intervals and better dead code elimination and coalescing. 2476 */ 2477void 2478fs_visitor::split_virtual_grfs() 2479{ 2480 int num_vars = this->virtual_grf_next; 2481 bool split_grf[num_vars]; 2482 int new_virtual_grf[num_vars]; 2483 2484 /* Try to split anything > 0 sized. */ 2485 for (int i = 0; i < num_vars; i++) { 2486 if (this->virtual_grf_sizes[i] != 1) 2487 split_grf[i] = true; 2488 else 2489 split_grf[i] = false; 2490 } 2491 2492 if (brw->has_pln) { 2493 /* PLN opcodes rely on the delta_xy being contiguous. */ 2494 split_grf[this->delta_x.reg] = false; 2495 } 2496 2497 foreach_iter(exec_list_iterator, iter, this->instructions) { 2498 fs_inst *inst = (fs_inst *)iter.get(); 2499 2500 /* Texturing produces 4 contiguous registers, so no splitting. */ 2501 if ((inst->opcode == FS_OPCODE_TEX || 2502 inst->opcode == FS_OPCODE_TXB || 2503 inst->opcode == FS_OPCODE_TXL) && 2504 inst->dst.file == GRF) { 2505 split_grf[inst->dst.reg] = false; 2506 } 2507 } 2508 2509 /* Allocate new space for split regs. Note that the virtual 2510 * numbers will be contiguous. 2511 */ 2512 for (int i = 0; i < num_vars; i++) { 2513 if (split_grf[i]) { 2514 new_virtual_grf[i] = virtual_grf_alloc(1); 2515 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2516 int reg = virtual_grf_alloc(1); 2517 assert(reg == new_virtual_grf[i] + j - 1); 2518 (void) reg; 2519 } 2520 this->virtual_grf_sizes[i] = 1; 2521 } 2522 } 2523 2524 foreach_iter(exec_list_iterator, iter, this->instructions) { 2525 fs_inst *inst = (fs_inst *)iter.get(); 2526 2527 if (inst->dst.file == GRF && 2528 split_grf[inst->dst.reg] && 2529 inst->dst.reg_offset != 0) { 2530 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2531 inst->dst.reg_offset - 1); 2532 inst->dst.reg_offset = 0; 2533 } 2534 for (int i = 0; i < 3; i++) { 2535 if (inst->src[i].file == GRF && 2536 split_grf[inst->src[i].reg] && 2537 inst->src[i].reg_offset != 0) { 2538 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2539 inst->src[i].reg_offset - 1); 2540 inst->src[i].reg_offset = 0; 2541 } 2542 } 2543 } 2544} 2545 2546/** 2547 * Choose accesses from the UNIFORM file to demote to using the pull 2548 * constant buffer. 2549 * 2550 * We allow a fragment shader to have more than the specified minimum 2551 * maximum number of fragment shader uniform components (64). If 2552 * there are too many of these, they'd fill up all of register space. 2553 * So, this will push some of them out to the pull constant buffer and 2554 * update the program to load them. 2555 */ 2556void 2557fs_visitor::setup_pull_constants() 2558{ 2559 /* Only allow 16 registers (128 uniform components) as push constants. */ 2560 unsigned int max_uniform_components = 16 * 8; 2561 if (c->prog_data.nr_params <= max_uniform_components) 2562 return; 2563 2564 /* Just demote the end of the list. We could probably do better 2565 * here, demoting things that are rarely used in the program first. 2566 */ 2567 int pull_uniform_base = max_uniform_components; 2568 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2569 2570 foreach_iter(exec_list_iterator, iter, this->instructions) { 2571 fs_inst *inst = (fs_inst *)iter.get(); 2572 2573 for (int i = 0; i < 3; i++) { 2574 if (inst->src[i].file != UNIFORM) 2575 continue; 2576 2577 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2578 if (uniform_nr < pull_uniform_base) 2579 continue; 2580 2581 fs_reg dst = fs_reg(this, glsl_type::float_type); 2582 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2583 dst); 2584 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2585 pull->ir = inst->ir; 2586 pull->annotation = inst->annotation; 2587 pull->base_mrf = 14; 2588 pull->mlen = 1; 2589 2590 inst->insert_before(pull); 2591 2592 inst->src[i].file = GRF; 2593 inst->src[i].reg = dst.reg; 2594 inst->src[i].reg_offset = 0; 2595 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2596 } 2597 } 2598 2599 for (int i = 0; i < pull_uniform_count; i++) { 2600 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2601 c->prog_data.pull_param_convert[i] = 2602 c->prog_data.param_convert[pull_uniform_base + i]; 2603 } 2604 c->prog_data.nr_params -= pull_uniform_count; 2605 c->prog_data.nr_pull_params = pull_uniform_count; 2606} 2607 2608void 2609fs_visitor::calculate_live_intervals() 2610{ 2611 int num_vars = this->virtual_grf_next; 2612 int *def = talloc_array(mem_ctx, int, num_vars); 2613 int *use = talloc_array(mem_ctx, int, num_vars); 2614 int loop_depth = 0; 2615 int loop_start = 0; 2616 int bb_header_ip = 0; 2617 2618 for (int i = 0; i < num_vars; i++) { 2619 def[i] = 1 << 30; 2620 use[i] = -1; 2621 } 2622 2623 int ip = 0; 2624 foreach_iter(exec_list_iterator, iter, this->instructions) { 2625 fs_inst *inst = (fs_inst *)iter.get(); 2626 2627 if (inst->opcode == BRW_OPCODE_DO) { 2628 if (loop_depth++ == 0) 2629 loop_start = ip; 2630 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2631 loop_depth--; 2632 2633 if (loop_depth == 0) { 2634 /* Patches up the use of vars marked for being live across 2635 * the whole loop. 2636 */ 2637 for (int i = 0; i < num_vars; i++) { 2638 if (use[i] == loop_start) { 2639 use[i] = ip; 2640 } 2641 } 2642 } 2643 } else { 2644 for (unsigned int i = 0; i < 3; i++) { 2645 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2646 int reg = inst->src[i].reg; 2647 2648 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2649 def[reg] >= bb_header_ip)) { 2650 use[reg] = ip; 2651 } else { 2652 def[reg] = MIN2(loop_start, def[reg]); 2653 use[reg] = loop_start; 2654 2655 /* Nobody else is going to go smash our start to 2656 * later in the loop now, because def[reg] now 2657 * points before the bb header. 2658 */ 2659 } 2660 } 2661 } 2662 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2663 int reg = inst->dst.reg; 2664 2665 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2666 !inst->predicated)) { 2667 def[reg] = MIN2(def[reg], ip); 2668 } else { 2669 def[reg] = MIN2(def[reg], loop_start); 2670 } 2671 } 2672 } 2673 2674 ip++; 2675 2676 /* Set the basic block header IP. This is used for determining 2677 * if a complete def of single-register virtual GRF in a loop 2678 * dominates a use in the same basic block. It's a quick way to 2679 * reduce the live interval range of most register used in a 2680 * loop. 2681 */ 2682 if (inst->opcode == BRW_OPCODE_IF || 2683 inst->opcode == BRW_OPCODE_ELSE || 2684 inst->opcode == BRW_OPCODE_ENDIF || 2685 inst->opcode == BRW_OPCODE_DO || 2686 inst->opcode == BRW_OPCODE_WHILE || 2687 inst->opcode == BRW_OPCODE_BREAK || 2688 inst->opcode == BRW_OPCODE_CONTINUE) { 2689 bb_header_ip = ip; 2690 } 2691 } 2692 2693 talloc_free(this->virtual_grf_def); 2694 talloc_free(this->virtual_grf_use); 2695 this->virtual_grf_def = def; 2696 this->virtual_grf_use = use; 2697} 2698 2699/** 2700 * Attempts to move immediate constants into the immediate 2701 * constant slot of following instructions. 2702 * 2703 * Immediate constants are a bit tricky -- they have to be in the last 2704 * operand slot, you can't do abs/negate on them, 2705 */ 2706 2707bool 2708fs_visitor::propagate_constants() 2709{ 2710 bool progress = false; 2711 2712 foreach_iter(exec_list_iterator, iter, this->instructions) { 2713 fs_inst *inst = (fs_inst *)iter.get(); 2714 2715 if (inst->opcode != BRW_OPCODE_MOV || 2716 inst->predicated || 2717 inst->dst.file != GRF || inst->src[0].file != IMM || 2718 inst->dst.type != inst->src[0].type) 2719 continue; 2720 2721 /* Don't bother with cases where we should have had the 2722 * operation on the constant folded in GLSL already. 2723 */ 2724 if (inst->saturate) 2725 continue; 2726 2727 /* Found a move of a constant to a GRF. Find anything else using the GRF 2728 * before it's written, and replace it with the constant if we can. 2729 */ 2730 exec_list_iterator scan_iter = iter; 2731 scan_iter.next(); 2732 for (; scan_iter.has_next(); scan_iter.next()) { 2733 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2734 2735 if (scan_inst->opcode == BRW_OPCODE_DO || 2736 scan_inst->opcode == BRW_OPCODE_WHILE || 2737 scan_inst->opcode == BRW_OPCODE_ELSE || 2738 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2739 break; 2740 } 2741 2742 for (int i = 2; i >= 0; i--) { 2743 if (scan_inst->src[i].file != GRF || 2744 scan_inst->src[i].reg != inst->dst.reg || 2745 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2746 continue; 2747 2748 /* Don't bother with cases where we should have had the 2749 * operation on the constant folded in GLSL already. 2750 */ 2751 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2752 continue; 2753 2754 switch (scan_inst->opcode) { 2755 case BRW_OPCODE_MOV: 2756 scan_inst->src[i] = inst->src[0]; 2757 progress = true; 2758 break; 2759 2760 case BRW_OPCODE_MUL: 2761 case BRW_OPCODE_ADD: 2762 if (i == 1) { 2763 scan_inst->src[i] = inst->src[0]; 2764 progress = true; 2765 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2766 /* Fit this constant in by commuting the operands */ 2767 scan_inst->src[0] = scan_inst->src[1]; 2768 scan_inst->src[1] = inst->src[0]; 2769 } 2770 break; 2771 case BRW_OPCODE_CMP: 2772 if (i == 1) { 2773 scan_inst->src[i] = inst->src[0]; 2774 progress = true; 2775 } 2776 } 2777 } 2778 2779 if (scan_inst->dst.file == GRF && 2780 scan_inst->dst.reg == inst->dst.reg && 2781 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2782 scan_inst->opcode == FS_OPCODE_TEX)) { 2783 break; 2784 } 2785 } 2786 } 2787 2788 return progress; 2789} 2790/** 2791 * Must be called after calculate_live_intervales() to remove unused 2792 * writes to registers -- register allocation will fail otherwise 2793 * because something deffed but not used won't be considered to 2794 * interfere with other regs. 2795 */ 2796bool 2797fs_visitor::dead_code_eliminate() 2798{ 2799 bool progress = false; 2800 int num_vars = this->virtual_grf_next; 2801 bool dead[num_vars]; 2802 2803 for (int i = 0; i < num_vars; i++) { 2804 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; 2805 2806 if (dead[i]) { 2807 /* Mark off its interval so it won't interfere with anything. */ 2808 this->virtual_grf_def[i] = -1; 2809 this->virtual_grf_use[i] = -1; 2810 } 2811 } 2812 2813 foreach_iter(exec_list_iterator, iter, this->instructions) { 2814 fs_inst *inst = (fs_inst *)iter.get(); 2815 2816 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2817 inst->remove(); 2818 progress = true; 2819 } 2820 } 2821 2822 return progress; 2823} 2824 2825bool 2826fs_visitor::register_coalesce() 2827{ 2828 bool progress = false; 2829 2830 foreach_iter(exec_list_iterator, iter, this->instructions) { 2831 fs_inst *inst = (fs_inst *)iter.get(); 2832 2833 if (inst->opcode != BRW_OPCODE_MOV || 2834 inst->predicated || 2835 inst->saturate || 2836 inst->dst.file != GRF || inst->src[0].file != GRF || 2837 inst->dst.type != inst->src[0].type) 2838 continue; 2839 2840 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2841 * them: check for no writes to either one until the exit of the 2842 * program. 2843 */ 2844 bool interfered = false; 2845 exec_list_iterator scan_iter = iter; 2846 scan_iter.next(); 2847 for (; scan_iter.has_next(); scan_iter.next()) { 2848 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2849 2850 if (scan_inst->opcode == BRW_OPCODE_DO || 2851 scan_inst->opcode == BRW_OPCODE_WHILE || 2852 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2853 interfered = true; 2854 iter = scan_iter; 2855 break; 2856 } 2857 2858 if (scan_inst->dst.file == GRF) { 2859 if (scan_inst->dst.reg == inst->dst.reg && 2860 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2861 scan_inst->opcode == FS_OPCODE_TEX)) { 2862 interfered = true; 2863 break; 2864 } 2865 if (scan_inst->dst.reg == inst->src[0].reg && 2866 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2867 scan_inst->opcode == FS_OPCODE_TEX)) { 2868 interfered = true; 2869 break; 2870 } 2871 } 2872 } 2873 if (interfered) { 2874 continue; 2875 } 2876 2877 /* Update live interval so we don't have to recalculate. */ 2878 this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], 2879 virtual_grf_use[inst->dst.reg]); 2880 2881 /* Rewrite the later usage to point at the source of the move to 2882 * be removed. 2883 */ 2884 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2885 scan_iter.next()) { 2886 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2887 2888 for (int i = 0; i < 3; i++) { 2889 if (scan_inst->src[i].file == GRF && 2890 scan_inst->src[i].reg == inst->dst.reg && 2891 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2892 scan_inst->src[i].reg = inst->src[0].reg; 2893 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2894 scan_inst->src[i].abs |= inst->src[0].abs; 2895 scan_inst->src[i].negate ^= inst->src[0].negate; 2896 scan_inst->src[i].smear = inst->src[0].smear; 2897 } 2898 } 2899 } 2900 2901 inst->remove(); 2902 progress = true; 2903 } 2904 2905 return progress; 2906} 2907 2908 2909bool 2910fs_visitor::compute_to_mrf() 2911{ 2912 bool progress = false; 2913 int next_ip = 0; 2914 2915 foreach_iter(exec_list_iterator, iter, this->instructions) { 2916 fs_inst *inst = (fs_inst *)iter.get(); 2917 2918 int ip = next_ip; 2919 next_ip++; 2920 2921 if (inst->opcode != BRW_OPCODE_MOV || 2922 inst->predicated || 2923 inst->dst.file != MRF || inst->src[0].file != GRF || 2924 inst->dst.type != inst->src[0].type || 2925 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 2926 continue; 2927 2928 /* Can't compute-to-MRF this GRF if someone else was going to 2929 * read it later. 2930 */ 2931 if (this->virtual_grf_use[inst->src[0].reg] > ip) 2932 continue; 2933 2934 /* Found a move of a GRF to a MRF. Let's see if we can go 2935 * rewrite the thing that made this GRF to write into the MRF. 2936 */ 2937 bool found = false; 2938 fs_inst *scan_inst; 2939 for (scan_inst = (fs_inst *)inst->prev; 2940 scan_inst->prev != NULL; 2941 scan_inst = (fs_inst *)scan_inst->prev) { 2942 /* We don't handle flow control here. Most computation of 2943 * values that end up in MRFs are shortly before the MRF 2944 * write anyway. 2945 */ 2946 if (scan_inst->opcode == BRW_OPCODE_DO || 2947 scan_inst->opcode == BRW_OPCODE_WHILE || 2948 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2949 break; 2950 } 2951 2952 /* You can't read from an MRF, so if someone else reads our 2953 * MRF's source GRF that we wanted to rewrite, that stops us. 2954 */ 2955 bool interfered = false; 2956 for (int i = 0; i < 3; i++) { 2957 if (scan_inst->src[i].file == GRF && 2958 scan_inst->src[i].reg == inst->src[0].reg && 2959 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 2960 interfered = true; 2961 } 2962 } 2963 if (interfered) 2964 break; 2965 2966 if (scan_inst->dst.file == MRF && 2967 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 2968 /* Somebody else wrote our MRF here, so we can't can't 2969 * compute-to-MRF before that. 2970 */ 2971 break; 2972 } 2973 2974 if (scan_inst->mlen > 0) { 2975 /* Found a SEND instruction, which will do some amount of 2976 * implied write that may overwrite our MRF that we were 2977 * hoping to compute-to-MRF somewhere above it. Nothing 2978 * we have implied-writes more than 2 MRFs from base_mrf, 2979 * though. 2980 */ 2981 int implied_write_len = MIN2(scan_inst->mlen, 2); 2982 if (inst->dst.hw_reg >= scan_inst->base_mrf && 2983 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { 2984 break; 2985 } 2986 } 2987 2988 if (scan_inst->dst.file == GRF && 2989 scan_inst->dst.reg == inst->src[0].reg) { 2990 /* Found the last thing to write our reg we want to turn 2991 * into a compute-to-MRF. 2992 */ 2993 2994 if (scan_inst->opcode == FS_OPCODE_TEX) { 2995 /* texturing writes several continuous regs, so we can't 2996 * compute-to-mrf that. 2997 */ 2998 break; 2999 } 3000 3001 /* If it's predicated, it (probably) didn't populate all 3002 * the channels. 3003 */ 3004 if (scan_inst->predicated) 3005 break; 3006 3007 /* SEND instructions can't have MRF as a destination. */ 3008 if (scan_inst->mlen) 3009 break; 3010 3011 if (intel->gen >= 6) { 3012 /* gen6 math instructions must have the destination be 3013 * GRF, so no compute-to-MRF for them. 3014 */ 3015 if (scan_inst->opcode == FS_OPCODE_RCP || 3016 scan_inst->opcode == FS_OPCODE_RSQ || 3017 scan_inst->opcode == FS_OPCODE_SQRT || 3018 scan_inst->opcode == FS_OPCODE_EXP2 || 3019 scan_inst->opcode == FS_OPCODE_LOG2 || 3020 scan_inst->opcode == FS_OPCODE_SIN || 3021 scan_inst->opcode == FS_OPCODE_COS || 3022 scan_inst->opcode == FS_OPCODE_POW) { 3023 break; 3024 } 3025 } 3026 3027 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 3028 /* Found the creator of our MRF's source value. */ 3029 found = true; 3030 break; 3031 } 3032 } 3033 } 3034 if (found) { 3035 scan_inst->dst.file = MRF; 3036 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3037 scan_inst->saturate |= inst->saturate; 3038 inst->remove(); 3039 progress = true; 3040 } 3041 } 3042 3043 return progress; 3044} 3045 3046bool 3047fs_visitor::virtual_grf_interferes(int a, int b) 3048{ 3049 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3050 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3051 3052 /* For dead code, just check if the def interferes with the other range. */ 3053 if (this->virtual_grf_use[a] == -1) { 3054 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 3055 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 3056 } 3057 if (this->virtual_grf_use[b] == -1) { 3058 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 3059 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 3060 } 3061 3062 return start < end; 3063} 3064 3065static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3066{ 3067 struct brw_reg brw_reg; 3068 3069 switch (reg->file) { 3070 case GRF: 3071 case ARF: 3072 case MRF: 3073 if (reg->smear == -1) { 3074 brw_reg = brw_vec8_reg(reg->file, 3075 reg->hw_reg, 0); 3076 } else { 3077 brw_reg = brw_vec1_reg(reg->file, 3078 reg->hw_reg, reg->smear); 3079 } 3080 brw_reg = retype(brw_reg, reg->type); 3081 break; 3082 case IMM: 3083 switch (reg->type) { 3084 case BRW_REGISTER_TYPE_F: 3085 brw_reg = brw_imm_f(reg->imm.f); 3086 break; 3087 case BRW_REGISTER_TYPE_D: 3088 brw_reg = brw_imm_d(reg->imm.i); 3089 break; 3090 case BRW_REGISTER_TYPE_UD: 3091 brw_reg = brw_imm_ud(reg->imm.u); 3092 break; 3093 default: 3094 assert(!"not reached"); 3095 break; 3096 } 3097 break; 3098 case FIXED_HW_REG: 3099 brw_reg = reg->fixed_hw_reg; 3100 break; 3101 case BAD_FILE: 3102 /* Probably unused. */ 3103 brw_reg = brw_null_reg(); 3104 break; 3105 case UNIFORM: 3106 assert(!"not reached"); 3107 brw_reg = brw_null_reg(); 3108 break; 3109 } 3110 if (reg->abs) 3111 brw_reg = brw_abs(brw_reg); 3112 if (reg->negate) 3113 brw_reg = negate(brw_reg); 3114 3115 return brw_reg; 3116} 3117 3118void 3119fs_visitor::generate_code() 3120{ 3121 int last_native_inst = 0; 3122 struct brw_instruction *if_stack[16], *loop_stack[16]; 3123 int if_stack_depth = 0, loop_stack_depth = 0; 3124 int if_depth_in_loop[16]; 3125 const char *last_annotation_string = NULL; 3126 ir_instruction *last_annotation_ir = NULL; 3127 3128 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3129 printf("Native code for fragment shader %d:\n", 3130 ctx->Shader.CurrentFragmentProgram->Name); 3131 } 3132 3133 if_depth_in_loop[loop_stack_depth] = 0; 3134 3135 memset(&if_stack, 0, sizeof(if_stack)); 3136 foreach_iter(exec_list_iterator, iter, this->instructions) { 3137 fs_inst *inst = (fs_inst *)iter.get(); 3138 struct brw_reg src[3], dst; 3139 3140 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3141 if (last_annotation_ir != inst->ir) { 3142 last_annotation_ir = inst->ir; 3143 if (last_annotation_ir) { 3144 printf(" "); 3145 last_annotation_ir->print(); 3146 printf("\n"); 3147 } 3148 } 3149 if (last_annotation_string != inst->annotation) { 3150 last_annotation_string = inst->annotation; 3151 if (last_annotation_string) 3152 printf(" %s\n", last_annotation_string); 3153 } 3154 } 3155 3156 for (unsigned int i = 0; i < 3; i++) { 3157 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3158 } 3159 dst = brw_reg_from_fs_reg(&inst->dst); 3160 3161 brw_set_conditionalmod(p, inst->conditional_mod); 3162 brw_set_predicate_control(p, inst->predicated); 3163 3164 switch (inst->opcode) { 3165 case BRW_OPCODE_MOV: 3166 brw_MOV(p, dst, src[0]); 3167 break; 3168 case BRW_OPCODE_ADD: 3169 brw_ADD(p, dst, src[0], src[1]); 3170 break; 3171 case BRW_OPCODE_MUL: 3172 brw_MUL(p, dst, src[0], src[1]); 3173 break; 3174 3175 case BRW_OPCODE_FRC: 3176 brw_FRC(p, dst, src[0]); 3177 break; 3178 case BRW_OPCODE_RNDD: 3179 brw_RNDD(p, dst, src[0]); 3180 break; 3181 case BRW_OPCODE_RNDE: 3182 brw_RNDE(p, dst, src[0]); 3183 break; 3184 case BRW_OPCODE_RNDZ: 3185 brw_RNDZ(p, dst, src[0]); 3186 break; 3187 3188 case BRW_OPCODE_AND: 3189 brw_AND(p, dst, src[0], src[1]); 3190 break; 3191 case BRW_OPCODE_OR: 3192 brw_OR(p, dst, src[0], src[1]); 3193 break; 3194 case BRW_OPCODE_XOR: 3195 brw_XOR(p, dst, src[0], src[1]); 3196 break; 3197 case BRW_OPCODE_NOT: 3198 brw_NOT(p, dst, src[0]); 3199 break; 3200 case BRW_OPCODE_ASR: 3201 brw_ASR(p, dst, src[0], src[1]); 3202 break; 3203 case BRW_OPCODE_SHR: 3204 brw_SHR(p, dst, src[0], src[1]); 3205 break; 3206 case BRW_OPCODE_SHL: 3207 brw_SHL(p, dst, src[0], src[1]); 3208 break; 3209 3210 case BRW_OPCODE_CMP: 3211 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3212 break; 3213 case BRW_OPCODE_SEL: 3214 brw_SEL(p, dst, src[0], src[1]); 3215 break; 3216 3217 case BRW_OPCODE_IF: 3218 assert(if_stack_depth < 16); 3219 if (inst->src[0].file != BAD_FILE) { 3220 assert(intel->gen >= 6); 3221 if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); 3222 } else { 3223 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3224 } 3225 if_depth_in_loop[loop_stack_depth]++; 3226 if_stack_depth++; 3227 break; 3228 3229 case BRW_OPCODE_ELSE: 3230 if_stack[if_stack_depth - 1] = 3231 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3232 break; 3233 case BRW_OPCODE_ENDIF: 3234 if_stack_depth--; 3235 brw_ENDIF(p , if_stack[if_stack_depth]); 3236 if_depth_in_loop[loop_stack_depth]--; 3237 break; 3238 3239 case BRW_OPCODE_DO: 3240 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3241 if_depth_in_loop[loop_stack_depth] = 0; 3242 break; 3243 3244 case BRW_OPCODE_BREAK: 3245 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3246 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3247 break; 3248 case BRW_OPCODE_CONTINUE: 3249 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3250 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3251 break; 3252 3253 case BRW_OPCODE_WHILE: { 3254 struct brw_instruction *inst0, *inst1; 3255 GLuint br = 1; 3256 3257 if (intel->gen >= 5) 3258 br = 2; 3259 3260 assert(loop_stack_depth > 0); 3261 loop_stack_depth--; 3262 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3263 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3264 while (inst0 > loop_stack[loop_stack_depth]) { 3265 inst0--; 3266 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3267 inst0->bits3.if_else.jump_count == 0) { 3268 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3269 } 3270 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3271 inst0->bits3.if_else.jump_count == 0) { 3272 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3273 } 3274 } 3275 } 3276 break; 3277 3278 case FS_OPCODE_RCP: 3279 case FS_OPCODE_RSQ: 3280 case FS_OPCODE_SQRT: 3281 case FS_OPCODE_EXP2: 3282 case FS_OPCODE_LOG2: 3283 case FS_OPCODE_POW: 3284 case FS_OPCODE_SIN: 3285 case FS_OPCODE_COS: 3286 generate_math(inst, dst, src); 3287 break; 3288 case FS_OPCODE_LINTERP: 3289 generate_linterp(inst, dst, src); 3290 break; 3291 case FS_OPCODE_TEX: 3292 case FS_OPCODE_TXB: 3293 case FS_OPCODE_TXL: 3294 generate_tex(inst, dst); 3295 break; 3296 case FS_OPCODE_DISCARD_NOT: 3297 generate_discard_not(inst, dst); 3298 break; 3299 case FS_OPCODE_DISCARD_AND: 3300 generate_discard_and(inst, src[0]); 3301 break; 3302 case FS_OPCODE_DDX: 3303 generate_ddx(inst, dst, src[0]); 3304 break; 3305 case FS_OPCODE_DDY: 3306 generate_ddy(inst, dst, src[0]); 3307 break; 3308 3309 case FS_OPCODE_SPILL: 3310 generate_spill(inst, src[0]); 3311 break; 3312 3313 case FS_OPCODE_UNSPILL: 3314 generate_unspill(inst, dst); 3315 break; 3316 3317 case FS_OPCODE_PULL_CONSTANT_LOAD: 3318 generate_pull_constant_load(inst, dst); 3319 break; 3320 3321 case FS_OPCODE_FB_WRITE: 3322 generate_fb_write(inst); 3323 break; 3324 default: 3325 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3326 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3327 brw_opcodes[inst->opcode].name); 3328 } else { 3329 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3330 } 3331 this->fail = true; 3332 } 3333 3334 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3335 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3336 if (0) { 3337 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3338 ((uint32_t *)&p->store[i])[3], 3339 ((uint32_t *)&p->store[i])[2], 3340 ((uint32_t *)&p->store[i])[1], 3341 ((uint32_t *)&p->store[i])[0]); 3342 } 3343 brw_disasm(stdout, &p->store[i], intel->gen); 3344 printf("\n"); 3345 } 3346 } 3347 3348 last_native_inst = p->nr_insn; 3349 } 3350} 3351 3352GLboolean 3353brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3354{ 3355 struct intel_context *intel = &brw->intel; 3356 struct gl_context *ctx = &intel->ctx; 3357 struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; 3358 3359 if (!prog) 3360 return GL_FALSE; 3361 3362 struct brw_shader *shader = 3363 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3364 if (!shader) 3365 return GL_FALSE; 3366 3367 /* We always use 8-wide mode, at least for now. For one, flow 3368 * control only works in 8-wide. Also, when we're fragment shader 3369 * bound, we're almost always under register pressure as well, so 3370 * 8-wide would save us from the performance cliff of spilling 3371 * regs. 3372 */ 3373 c->dispatch_width = 8; 3374 3375 if (unlikely(INTEL_DEBUG & DEBUG_WM)) { 3376 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3377 _mesa_print_ir(shader->ir, NULL); 3378 printf("\n"); 3379 } 3380 3381 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3382 */ 3383 fs_visitor v(c, shader); 3384 3385 if (0) { 3386 v.emit_dummy_fs(); 3387 } else { 3388 v.calculate_urb_setup(); 3389 if (intel->gen < 6) 3390 v.emit_interpolation_setup_gen4(); 3391 else 3392 v.emit_interpolation_setup_gen6(); 3393 3394 /* Generate FS IR for main(). (the visitor only descends into 3395 * functions called "main"). 3396 */ 3397 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3398 ir_instruction *ir = (ir_instruction *)iter.get(); 3399 v.base_ir = ir; 3400 ir->accept(&v); 3401 } 3402 3403 v.emit_fb_writes(); 3404 3405 v.split_virtual_grfs(); 3406 v.setup_pull_constants(); 3407 3408 v.assign_curb_setup(); 3409 v.assign_urb_setup(); 3410 3411 bool progress; 3412 do { 3413 progress = false; 3414 v.calculate_live_intervals(); 3415 progress = v.propagate_constants() || progress; 3416 progress = v.register_coalesce() || progress; 3417 progress = v.compute_to_mrf() || progress; 3418 progress = v.dead_code_eliminate() || progress; 3419 } while (progress); 3420 3421 if (0) { 3422 /* Debug of register spilling: Go spill everything. */ 3423 int virtual_grf_count = v.virtual_grf_next; 3424 for (int i = 1; i < virtual_grf_count; i++) { 3425 v.spill_reg(i); 3426 } 3427 v.calculate_live_intervals(); 3428 } 3429 3430 if (0) 3431 v.assign_regs_trivial(); 3432 else { 3433 while (!v.assign_regs()) { 3434 if (v.fail) 3435 break; 3436 3437 v.calculate_live_intervals(); 3438 } 3439 } 3440 } 3441 3442 if (!v.fail) 3443 v.generate_code(); 3444 3445 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3446 3447 if (v.fail) 3448 return GL_FALSE; 3449 3450 c->prog_data.total_grf = v.grf_used; 3451 3452 return GL_TRUE; 3453} 3454