brw_fs.cpp revision 62452e7d94a6353b59dfe0a8891d0709670dbeac
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = talloc_zero(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = talloc_zero(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct intel_context *intel = intel_context(ctx); 93 94 struct brw_shader *shader = 95 (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 96 if (shader != NULL) { 97 void *mem_ctx = talloc_new(NULL); 98 bool progress; 99 100 if (shader->ir) 101 talloc_free(shader->ir); 102 shader->ir = new(shader) exec_list; 103 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 104 105 do_mat_op_to_vec(shader->ir); 106 do_mod_to_fract(shader->ir); 107 do_div_to_mul_rcp(shader->ir); 108 do_sub_to_add_neg(shader->ir); 109 do_explog_to_explog2(shader->ir); 110 do_lower_texture_projection(shader->ir); 111 brw_do_cubemap_normalize(shader->ir); 112 113 do { 114 progress = false; 115 116 brw_do_channel_expressions(shader->ir); 117 brw_do_vector_splitting(shader->ir); 118 119 progress = do_lower_jumps(shader->ir, true, true, 120 true, /* main return */ 121 false, /* continue */ 122 false /* loops */ 123 ) || progress; 124 125 progress = do_common_optimization(shader->ir, true, 32) || progress; 126 127 progress = lower_noise(shader->ir) || progress; 128 progress = 129 lower_variable_index_to_cond_assign(shader->ir, 130 GL_TRUE, /* input */ 131 GL_TRUE, /* output */ 132 GL_TRUE, /* temp */ 133 GL_TRUE /* uniform */ 134 ) || progress; 135 if (intel->gen == 6) { 136 progress = do_if_to_cond_assign(shader->ir) || progress; 137 } 138 } while (progress); 139 140 validate_ir_tree(shader->ir); 141 142 reparent_ir(shader->ir, shader->ir); 143 talloc_free(mem_ctx); 144 } 145 146 if (!_mesa_ir_link_shader(ctx, prog)) 147 return GL_FALSE; 148 149 return GL_TRUE; 150} 151 152static int 153type_size(const struct glsl_type *type) 154{ 155 unsigned int size, i; 156 157 switch (type->base_type) { 158 case GLSL_TYPE_UINT: 159 case GLSL_TYPE_INT: 160 case GLSL_TYPE_FLOAT: 161 case GLSL_TYPE_BOOL: 162 return type->components(); 163 case GLSL_TYPE_ARRAY: 164 return type_size(type->fields.array) * type->length; 165 case GLSL_TYPE_STRUCT: 166 size = 0; 167 for (i = 0; i < type->length; i++) { 168 size += type_size(type->fields.structure[i].type); 169 } 170 return size; 171 case GLSL_TYPE_SAMPLER: 172 /* Samplers take up no register space, since they're baked in at 173 * link time. 174 */ 175 return 0; 176 default: 177 assert(!"not reached"); 178 return 0; 179 } 180} 181 182int 183fs_visitor::virtual_grf_alloc(int size) 184{ 185 if (virtual_grf_array_size <= virtual_grf_next) { 186 if (virtual_grf_array_size == 0) 187 virtual_grf_array_size = 16; 188 else 189 virtual_grf_array_size *= 2; 190 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 191 int, virtual_grf_array_size); 192 193 /* This slot is always unused. */ 194 virtual_grf_sizes[0] = 0; 195 } 196 virtual_grf_sizes[virtual_grf_next] = size; 197 return virtual_grf_next++; 198} 199 200/** Fixed HW reg constructor. */ 201fs_reg::fs_reg(enum register_file file, int hw_reg) 202{ 203 init(); 204 this->file = file; 205 this->hw_reg = hw_reg; 206 this->type = BRW_REGISTER_TYPE_F; 207} 208 209/** Fixed HW reg constructor. */ 210fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) 211{ 212 init(); 213 this->file = file; 214 this->hw_reg = hw_reg; 215 this->type = type; 216} 217 218int 219brw_type_for_base_type(const struct glsl_type *type) 220{ 221 switch (type->base_type) { 222 case GLSL_TYPE_FLOAT: 223 return BRW_REGISTER_TYPE_F; 224 case GLSL_TYPE_INT: 225 case GLSL_TYPE_BOOL: 226 return BRW_REGISTER_TYPE_D; 227 case GLSL_TYPE_UINT: 228 return BRW_REGISTER_TYPE_UD; 229 case GLSL_TYPE_ARRAY: 230 case GLSL_TYPE_STRUCT: 231 /* These should be overridden with the type of the member when 232 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 233 * way to trip up if we don't. 234 */ 235 return BRW_REGISTER_TYPE_UD; 236 default: 237 assert(!"not reached"); 238 return BRW_REGISTER_TYPE_F; 239 } 240} 241 242/** Automatic reg constructor. */ 243fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 244{ 245 init(); 246 247 this->file = GRF; 248 this->reg = v->virtual_grf_alloc(type_size(type)); 249 this->reg_offset = 0; 250 this->type = brw_type_for_base_type(type); 251} 252 253fs_reg * 254fs_visitor::variable_storage(ir_variable *var) 255{ 256 return (fs_reg *)hash_table_find(this->variable_ht, var); 257} 258 259/* Our support for uniforms is piggy-backed on the struct 260 * gl_fragment_program, because that's where the values actually 261 * get stored, rather than in some global gl_shader_program uniform 262 * store. 263 */ 264int 265fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 266{ 267 unsigned int offset = 0; 268 float *vec_values; 269 270 if (type->is_matrix()) { 271 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 272 type->vector_elements, 273 1); 274 275 for (unsigned int i = 0; i < type->matrix_columns; i++) { 276 offset += setup_uniform_values(loc + offset, column); 277 } 278 279 return offset; 280 } 281 282 switch (type->base_type) { 283 case GLSL_TYPE_FLOAT: 284 case GLSL_TYPE_UINT: 285 case GLSL_TYPE_INT: 286 case GLSL_TYPE_BOOL: 287 vec_values = fp->Base.Parameters->ParameterValues[loc]; 288 for (unsigned int i = 0; i < type->vector_elements; i++) { 289 assert(c->prog_data.nr_params < ARRAY_SIZE(c->prog_data.param)); 290 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 291 } 292 return 1; 293 294 case GLSL_TYPE_STRUCT: 295 for (unsigned int i = 0; i < type->length; i++) { 296 offset += setup_uniform_values(loc + offset, 297 type->fields.structure[i].type); 298 } 299 return offset; 300 301 case GLSL_TYPE_ARRAY: 302 for (unsigned int i = 0; i < type->length; i++) { 303 offset += setup_uniform_values(loc + offset, type->fields.array); 304 } 305 return offset; 306 307 case GLSL_TYPE_SAMPLER: 308 /* The sampler takes up a slot, but we don't use any values from it. */ 309 return 1; 310 311 default: 312 assert(!"not reached"); 313 return 0; 314 } 315} 316 317 318/* Our support for builtin uniforms is even scarier than non-builtin. 319 * It sits on top of the PROG_STATE_VAR parameters that are 320 * automatically updated from GL context state. 321 */ 322void 323fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 324{ 325 const struct gl_builtin_uniform_desc *statevar = NULL; 326 327 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 328 statevar = &_mesa_builtin_uniform_desc[i]; 329 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 330 break; 331 } 332 333 if (!statevar->name) { 334 this->fail = true; 335 printf("Failed to find builtin uniform `%s'\n", ir->name); 336 return; 337 } 338 339 int array_count; 340 if (ir->type->is_array()) { 341 array_count = ir->type->length; 342 } else { 343 array_count = 1; 344 } 345 346 for (int a = 0; a < array_count; a++) { 347 for (unsigned int i = 0; i < statevar->num_elements; i++) { 348 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 349 int tokens[STATE_LENGTH]; 350 351 memcpy(tokens, element->tokens, sizeof(element->tokens)); 352 if (ir->type->is_array()) { 353 tokens[1] = a; 354 } 355 356 /* This state reference has already been setup by ir_to_mesa, 357 * but we'll get the same index back here. 358 */ 359 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 360 (gl_state_index *)tokens); 361 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 362 363 /* Add each of the unique swizzles of the element as a 364 * parameter. This'll end up matching the expected layout of 365 * the array/matrix/structure we're trying to fill in. 366 */ 367 int last_swiz = -1; 368 for (unsigned int i = 0; i < 4; i++) { 369 int swiz = GET_SWZ(element->swizzle, i); 370 if (swiz == last_swiz) 371 break; 372 last_swiz = swiz; 373 374 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 375 } 376 } 377 } 378} 379 380fs_reg * 381fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 382{ 383 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 384 fs_reg wpos = *reg; 385 fs_reg neg_y = this->pixel_y; 386 neg_y.negate = true; 387 388 /* gl_FragCoord.x */ 389 if (ir->pixel_center_integer) { 390 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 391 } else { 392 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 393 } 394 wpos.reg_offset++; 395 396 /* gl_FragCoord.y */ 397 if (ir->origin_upper_left && ir->pixel_center_integer) { 398 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 399 } else { 400 fs_reg pixel_y = this->pixel_y; 401 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 402 403 if (!ir->origin_upper_left) { 404 pixel_y.negate = true; 405 offset += c->key.drawable_height - 1.0; 406 } 407 408 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 409 } 410 wpos.reg_offset++; 411 412 /* gl_FragCoord.z */ 413 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 414 interp_reg(FRAG_ATTRIB_WPOS, 2))); 415 wpos.reg_offset++; 416 417 /* gl_FragCoord.w: Already set up in emit_interpolation */ 418 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 419 420 return reg; 421} 422 423fs_reg * 424fs_visitor::emit_general_interpolation(ir_variable *ir) 425{ 426 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 427 /* Interpolation is always in floating point regs. */ 428 reg->type = BRW_REGISTER_TYPE_F; 429 fs_reg attr = *reg; 430 431 unsigned int array_elements; 432 const glsl_type *type; 433 434 if (ir->type->is_array()) { 435 array_elements = ir->type->length; 436 if (array_elements == 0) { 437 this->fail = true; 438 } 439 type = ir->type->fields.array; 440 } else { 441 array_elements = 1; 442 type = ir->type; 443 } 444 445 int location = ir->location; 446 for (unsigned int i = 0; i < array_elements; i++) { 447 for (unsigned int j = 0; j < type->matrix_columns; j++) { 448 if (urb_setup[location] == -1) { 449 /* If there's no incoming setup data for this slot, don't 450 * emit interpolation for it. 451 */ 452 attr.reg_offset += type->vector_elements; 453 location++; 454 continue; 455 } 456 457 for (unsigned int c = 0; c < type->vector_elements; c++) { 458 struct brw_reg interp = interp_reg(location, c); 459 emit(fs_inst(FS_OPCODE_LINTERP, 460 attr, 461 this->delta_x, 462 this->delta_y, 463 fs_reg(interp))); 464 attr.reg_offset++; 465 } 466 467 if (intel->gen < 6) { 468 attr.reg_offset -= type->vector_elements; 469 for (unsigned int c = 0; c < type->vector_elements; c++) { 470 emit(fs_inst(BRW_OPCODE_MUL, 471 attr, 472 attr, 473 this->pixel_w)); 474 attr.reg_offset++; 475 } 476 } 477 location++; 478 } 479 } 480 481 return reg; 482} 483 484fs_reg * 485fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 486{ 487 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 488 489 /* The frontfacing comes in as a bit in the thread payload. */ 490 if (intel->gen >= 6) { 491 emit(fs_inst(BRW_OPCODE_ASR, 492 *reg, 493 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 494 fs_reg(15))); 495 emit(fs_inst(BRW_OPCODE_NOT, 496 *reg, 497 *reg)); 498 emit(fs_inst(BRW_OPCODE_AND, 499 *reg, 500 *reg, 501 fs_reg(1))); 502 } else { 503 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 504 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 505 * us front face 506 */ 507 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 508 *reg, 509 fs_reg(r1_6ud), 510 fs_reg(1u << 31))); 511 inst->conditional_mod = BRW_CONDITIONAL_L; 512 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 513 } 514 515 return reg; 516} 517 518fs_inst * 519fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 520{ 521 switch (opcode) { 522 case FS_OPCODE_RCP: 523 case FS_OPCODE_RSQ: 524 case FS_OPCODE_SQRT: 525 case FS_OPCODE_EXP2: 526 case FS_OPCODE_LOG2: 527 case FS_OPCODE_SIN: 528 case FS_OPCODE_COS: 529 break; 530 default: 531 assert(!"not reached: bad math opcode"); 532 return NULL; 533 } 534 535 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 536 * might be able to do better by doing execsize = 1 math and then 537 * expanding that result out, but we would need to be careful with 538 * masking. 539 */ 540 if (intel->gen >= 6 && src.file == UNIFORM) { 541 fs_reg expanded = fs_reg(this, glsl_type::float_type); 542 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 543 src = expanded; 544 } 545 546 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 547 548 if (intel->gen < 6) { 549 inst->base_mrf = 2; 550 inst->mlen = 1; 551 } 552 553 return inst; 554} 555 556fs_inst * 557fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 558{ 559 int base_mrf = 2; 560 fs_inst *inst; 561 562 assert(opcode == FS_OPCODE_POW); 563 564 if (intel->gen >= 6) { 565 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 566 if (src0.file == UNIFORM) { 567 fs_reg expanded = fs_reg(this, glsl_type::float_type); 568 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 569 src0 = expanded; 570 } 571 572 if (src1.file == UNIFORM) { 573 fs_reg expanded = fs_reg(this, glsl_type::float_type); 574 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 575 src1 = expanded; 576 } 577 578 inst = emit(fs_inst(opcode, dst, src0, src1)); 579 } else { 580 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 581 inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); 582 583 inst->base_mrf = base_mrf; 584 inst->mlen = 2; 585 } 586 return inst; 587} 588 589void 590fs_visitor::visit(ir_variable *ir) 591{ 592 fs_reg *reg = NULL; 593 594 if (variable_storage(ir)) 595 return; 596 597 if (strcmp(ir->name, "gl_FragColor") == 0) { 598 this->frag_color = ir; 599 } else if (strcmp(ir->name, "gl_FragData") == 0) { 600 this->frag_data = ir; 601 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 602 this->frag_depth = ir; 603 } 604 605 if (ir->mode == ir_var_in) { 606 if (!strcmp(ir->name, "gl_FragCoord")) { 607 reg = emit_fragcoord_interpolation(ir); 608 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 609 reg = emit_frontfacing_interpolation(ir); 610 } else { 611 reg = emit_general_interpolation(ir); 612 } 613 assert(reg); 614 hash_table_insert(this->variable_ht, reg, ir); 615 return; 616 } 617 618 if (ir->mode == ir_var_uniform) { 619 int param_index = c->prog_data.nr_params; 620 621 if (!strncmp(ir->name, "gl_", 3)) { 622 setup_builtin_uniform_values(ir); 623 } else { 624 setup_uniform_values(ir->location, ir->type); 625 } 626 627 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 628 } 629 630 if (!reg) 631 reg = new(this->mem_ctx) fs_reg(this, ir->type); 632 633 hash_table_insert(this->variable_ht, reg, ir); 634} 635 636void 637fs_visitor::visit(ir_dereference_variable *ir) 638{ 639 fs_reg *reg = variable_storage(ir->var); 640 this->result = *reg; 641} 642 643void 644fs_visitor::visit(ir_dereference_record *ir) 645{ 646 const glsl_type *struct_type = ir->record->type; 647 648 ir->record->accept(this); 649 650 unsigned int offset = 0; 651 for (unsigned int i = 0; i < struct_type->length; i++) { 652 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 653 break; 654 offset += type_size(struct_type->fields.structure[i].type); 655 } 656 this->result.reg_offset += offset; 657 this->result.type = brw_type_for_base_type(ir->type); 658} 659 660void 661fs_visitor::visit(ir_dereference_array *ir) 662{ 663 ir_constant *index; 664 int element_size; 665 666 ir->array->accept(this); 667 index = ir->array_index->as_constant(); 668 669 element_size = type_size(ir->type); 670 this->result.type = brw_type_for_base_type(ir->type); 671 672 if (index) { 673 assert(this->result.file == UNIFORM || 674 (this->result.file == GRF && 675 this->result.reg != 0)); 676 this->result.reg_offset += index->value.i[0] * element_size; 677 } else { 678 assert(!"FINISHME: non-constant array element"); 679 } 680} 681 682void 683fs_visitor::visit(ir_expression *ir) 684{ 685 unsigned int operand; 686 fs_reg op[2], temp; 687 fs_inst *inst; 688 689 for (operand = 0; operand < ir->get_num_operands(); operand++) { 690 ir->operands[operand]->accept(this); 691 if (this->result.file == BAD_FILE) { 692 ir_print_visitor v; 693 printf("Failed to get tree for expression operand:\n"); 694 ir->operands[operand]->accept(&v); 695 this->fail = true; 696 } 697 op[operand] = this->result; 698 699 /* Matrix expression operands should have been broken down to vector 700 * operations already. 701 */ 702 assert(!ir->operands[operand]->type->is_matrix()); 703 /* And then those vector operands should have been broken down to scalar. 704 */ 705 assert(!ir->operands[operand]->type->is_vector()); 706 } 707 708 /* Storage for our result. If our result goes into an assignment, it will 709 * just get copy-propagated out, so no worries. 710 */ 711 this->result = fs_reg(this, ir->type); 712 713 switch (ir->operation) { 714 case ir_unop_logic_not: 715 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 716 * ones complement of the whole register, not just bit 0. 717 */ 718 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); 719 break; 720 case ir_unop_neg: 721 op[0].negate = !op[0].negate; 722 this->result = op[0]; 723 break; 724 case ir_unop_abs: 725 op[0].abs = true; 726 this->result = op[0]; 727 break; 728 case ir_unop_sign: 729 temp = fs_reg(this, ir->type); 730 731 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 732 733 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 734 inst->conditional_mod = BRW_CONDITIONAL_G; 735 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 736 inst->predicated = true; 737 738 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); 739 inst->conditional_mod = BRW_CONDITIONAL_L; 740 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 741 inst->predicated = true; 742 743 break; 744 case ir_unop_rcp: 745 emit_math(FS_OPCODE_RCP, this->result, op[0]); 746 break; 747 748 case ir_unop_exp2: 749 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 750 break; 751 case ir_unop_log2: 752 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 753 break; 754 case ir_unop_exp: 755 case ir_unop_log: 756 assert(!"not reached: should be handled by ir_explog_to_explog2"); 757 break; 758 case ir_unop_sin: 759 emit_math(FS_OPCODE_SIN, this->result, op[0]); 760 break; 761 case ir_unop_cos: 762 emit_math(FS_OPCODE_COS, this->result, op[0]); 763 break; 764 765 case ir_unop_dFdx: 766 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 767 break; 768 case ir_unop_dFdy: 769 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 770 break; 771 772 case ir_binop_add: 773 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 774 break; 775 case ir_binop_sub: 776 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 777 break; 778 779 case ir_binop_mul: 780 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 781 break; 782 case ir_binop_div: 783 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 784 break; 785 case ir_binop_mod: 786 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 787 break; 788 789 case ir_binop_less: 790 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 791 inst->conditional_mod = BRW_CONDITIONAL_L; 792 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 793 break; 794 case ir_binop_greater: 795 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 796 inst->conditional_mod = BRW_CONDITIONAL_G; 797 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 798 break; 799 case ir_binop_lequal: 800 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 801 inst->conditional_mod = BRW_CONDITIONAL_LE; 802 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 803 break; 804 case ir_binop_gequal: 805 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 806 inst->conditional_mod = BRW_CONDITIONAL_GE; 807 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 808 break; 809 case ir_binop_equal: 810 case ir_binop_all_equal: /* same as nequal for scalars */ 811 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 812 inst->conditional_mod = BRW_CONDITIONAL_Z; 813 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 814 break; 815 case ir_binop_nequal: 816 case ir_binop_any_nequal: /* same as nequal for scalars */ 817 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 818 inst->conditional_mod = BRW_CONDITIONAL_NZ; 819 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 820 break; 821 822 case ir_binop_logic_xor: 823 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 824 break; 825 826 case ir_binop_logic_or: 827 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 828 break; 829 830 case ir_binop_logic_and: 831 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 832 break; 833 834 case ir_binop_dot: 835 case ir_binop_cross: 836 case ir_unop_any: 837 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 838 break; 839 840 case ir_unop_noise: 841 assert(!"not reached: should be handled by lower_noise"); 842 break; 843 844 case ir_unop_sqrt: 845 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 846 break; 847 848 case ir_unop_rsq: 849 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 850 break; 851 852 case ir_unop_i2f: 853 case ir_unop_b2f: 854 case ir_unop_b2i: 855 case ir_unop_f2i: 856 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 857 break; 858 case ir_unop_f2b: 859 case ir_unop_i2b: 860 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 861 inst->conditional_mod = BRW_CONDITIONAL_NZ; 862 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 863 this->result, fs_reg(1))); 864 break; 865 866 case ir_unop_trunc: 867 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 868 break; 869 case ir_unop_ceil: 870 op[0].negate = !op[0].negate; 871 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 872 this->result.negate = true; 873 break; 874 case ir_unop_floor: 875 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 876 break; 877 case ir_unop_fract: 878 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 879 break; 880 case ir_unop_round_even: 881 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 882 break; 883 884 case ir_binop_min: 885 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 886 inst->conditional_mod = BRW_CONDITIONAL_L; 887 888 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 889 inst->predicated = true; 890 break; 891 case ir_binop_max: 892 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 893 inst->conditional_mod = BRW_CONDITIONAL_G; 894 895 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 896 inst->predicated = true; 897 break; 898 899 case ir_binop_pow: 900 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 901 break; 902 903 case ir_unop_bit_not: 904 case ir_unop_u2f: 905 case ir_binop_lshift: 906 case ir_binop_rshift: 907 case ir_binop_bit_and: 908 case ir_binop_bit_xor: 909 case ir_binop_bit_or: 910 assert(!"GLSL 1.30 features unsupported"); 911 break; 912 } 913} 914 915void 916fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 917 const glsl_type *type, bool predicated) 918{ 919 switch (type->base_type) { 920 case GLSL_TYPE_FLOAT: 921 case GLSL_TYPE_UINT: 922 case GLSL_TYPE_INT: 923 case GLSL_TYPE_BOOL: 924 for (unsigned int i = 0; i < type->components(); i++) { 925 l.type = brw_type_for_base_type(type); 926 r.type = brw_type_for_base_type(type); 927 928 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 929 inst->predicated = predicated; 930 931 l.reg_offset++; 932 r.reg_offset++; 933 } 934 break; 935 case GLSL_TYPE_ARRAY: 936 for (unsigned int i = 0; i < type->length; i++) { 937 emit_assignment_writes(l, r, type->fields.array, predicated); 938 } 939 break; 940 941 case GLSL_TYPE_STRUCT: 942 for (unsigned int i = 0; i < type->length; i++) { 943 emit_assignment_writes(l, r, type->fields.structure[i].type, 944 predicated); 945 } 946 break; 947 948 case GLSL_TYPE_SAMPLER: 949 break; 950 951 default: 952 assert(!"not reached"); 953 break; 954 } 955} 956 957void 958fs_visitor::visit(ir_assignment *ir) 959{ 960 struct fs_reg l, r; 961 fs_inst *inst; 962 963 /* FINISHME: arrays on the lhs */ 964 ir->lhs->accept(this); 965 l = this->result; 966 967 ir->rhs->accept(this); 968 r = this->result; 969 970 assert(l.file != BAD_FILE); 971 assert(r.file != BAD_FILE); 972 973 if (ir->condition) { 974 emit_bool_to_cond_code(ir->condition); 975 } 976 977 if (ir->lhs->type->is_scalar() || 978 ir->lhs->type->is_vector()) { 979 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 980 if (ir->write_mask & (1 << i)) { 981 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 982 if (ir->condition) 983 inst->predicated = true; 984 r.reg_offset++; 985 } 986 l.reg_offset++; 987 } 988 } else { 989 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 990 } 991} 992 993fs_inst * 994fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 995{ 996 int mlen; 997 int base_mrf = 1; 998 bool simd16 = false; 999 fs_reg orig_dst; 1000 1001 /* g0 header. */ 1002 mlen = 1; 1003 1004 if (ir->shadow_comparitor) { 1005 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1006 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1007 coordinate)); 1008 coordinate.reg_offset++; 1009 } 1010 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1011 mlen += 3; 1012 1013 if (ir->op == ir_tex) { 1014 /* There's no plain shadow compare message, so we use shadow 1015 * compare with a bias of 0.0. 1016 */ 1017 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1018 fs_reg(0.0f))); 1019 mlen++; 1020 } else if (ir->op == ir_txb) { 1021 ir->lod_info.bias->accept(this); 1022 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1023 this->result)); 1024 mlen++; 1025 } else { 1026 assert(ir->op == ir_txl); 1027 ir->lod_info.lod->accept(this); 1028 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1029 this->result)); 1030 mlen++; 1031 } 1032 1033 ir->shadow_comparitor->accept(this); 1034 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1035 mlen++; 1036 } else if (ir->op == ir_tex) { 1037 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1038 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1039 coordinate)); 1040 coordinate.reg_offset++; 1041 } 1042 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1043 mlen += 3; 1044 } else { 1045 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1046 * instructions. We'll need to do SIMD16 here. 1047 */ 1048 assert(ir->op == ir_txb || ir->op == ir_txl); 1049 1050 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1051 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1052 coordinate)); 1053 coordinate.reg_offset++; 1054 } 1055 1056 /* lod/bias appears after u/v/r. */ 1057 mlen += 6; 1058 1059 if (ir->op == ir_txb) { 1060 ir->lod_info.bias->accept(this); 1061 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1062 this->result)); 1063 mlen++; 1064 } else { 1065 ir->lod_info.lod->accept(this); 1066 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1067 this->result)); 1068 mlen++; 1069 } 1070 1071 /* The unused upper half. */ 1072 mlen++; 1073 1074 /* Now, since we're doing simd16, the return is 2 interleaved 1075 * vec4s where the odd-indexed ones are junk. We'll need to move 1076 * this weirdness around to the expected layout. 1077 */ 1078 simd16 = true; 1079 orig_dst = dst; 1080 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1081 2)); 1082 dst.type = BRW_REGISTER_TYPE_F; 1083 } 1084 1085 fs_inst *inst = NULL; 1086 switch (ir->op) { 1087 case ir_tex: 1088 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1089 break; 1090 case ir_txb: 1091 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1092 break; 1093 case ir_txl: 1094 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1095 break; 1096 case ir_txd: 1097 case ir_txf: 1098 assert(!"GLSL 1.30 features unsupported"); 1099 break; 1100 } 1101 inst->base_mrf = base_mrf; 1102 inst->mlen = mlen; 1103 1104 if (simd16) { 1105 for (int i = 0; i < 4; i++) { 1106 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1107 orig_dst.reg_offset++; 1108 dst.reg_offset += 2; 1109 } 1110 } 1111 1112 return inst; 1113} 1114 1115fs_inst * 1116fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1117{ 1118 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1119 * optional parameters like shadow comparitor or LOD bias. If 1120 * optional parameters aren't present, those base slots are 1121 * optional and don't need to be included in the message. 1122 * 1123 * We don't fill in the unnecessary slots regardless, which may 1124 * look surprising in the disassembly. 1125 */ 1126 int mlen = 1; /* g0 header always present. */ 1127 int base_mrf = 1; 1128 1129 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1130 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1131 coordinate)); 1132 coordinate.reg_offset++; 1133 } 1134 mlen += ir->coordinate->type->vector_elements; 1135 1136 if (ir->shadow_comparitor) { 1137 mlen = MAX2(mlen, 5); 1138 1139 ir->shadow_comparitor->accept(this); 1140 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1141 mlen++; 1142 } 1143 1144 fs_inst *inst = NULL; 1145 switch (ir->op) { 1146 case ir_tex: 1147 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1148 break; 1149 case ir_txb: 1150 ir->lod_info.bias->accept(this); 1151 mlen = MAX2(mlen, 5); 1152 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1153 mlen++; 1154 1155 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1156 break; 1157 case ir_txl: 1158 ir->lod_info.lod->accept(this); 1159 mlen = MAX2(mlen, 5); 1160 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1161 mlen++; 1162 1163 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1164 break; 1165 case ir_txd: 1166 case ir_txf: 1167 assert(!"GLSL 1.30 features unsupported"); 1168 break; 1169 } 1170 inst->base_mrf = base_mrf; 1171 inst->mlen = mlen; 1172 1173 return inst; 1174} 1175 1176void 1177fs_visitor::visit(ir_texture *ir) 1178{ 1179 int sampler; 1180 fs_inst *inst = NULL; 1181 1182 ir->coordinate->accept(this); 1183 fs_reg coordinate = this->result; 1184 1185 /* Should be lowered by do_lower_texture_projection */ 1186 assert(!ir->projector); 1187 1188 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1189 ctx->Shader.CurrentProgram, 1190 &brw->fragment_program->Base); 1191 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1192 1193 /* The 965 requires the EU to do the normalization of GL rectangle 1194 * texture coordinates. We use the program parameter state 1195 * tracking to get the scaling factor. 1196 */ 1197 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1198 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1199 int tokens[STATE_LENGTH] = { 1200 STATE_INTERNAL, 1201 STATE_TEXRECT_SCALE, 1202 sampler, 1203 0, 1204 0 1205 }; 1206 1207 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1208 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1209 GLuint index = _mesa_add_state_reference(params, 1210 (gl_state_index *)tokens); 1211 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 1212 1213 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; 1214 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; 1215 1216 fs_reg dst = fs_reg(this, ir->coordinate->type); 1217 fs_reg src = coordinate; 1218 coordinate = dst; 1219 1220 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1221 dst.reg_offset++; 1222 src.reg_offset++; 1223 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1224 } 1225 1226 /* Writemasking doesn't eliminate channels on SIMD8 texture 1227 * samples, so don't worry about them. 1228 */ 1229 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1230 1231 if (intel->gen < 5) { 1232 inst = emit_texture_gen4(ir, dst, coordinate); 1233 } else { 1234 inst = emit_texture_gen5(ir, dst, coordinate); 1235 } 1236 1237 inst->sampler = sampler; 1238 1239 this->result = dst; 1240 1241 if (ir->shadow_comparitor) 1242 inst->shadow_compare = true; 1243 1244 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1245 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1246 1247 for (int i = 0; i < 4; i++) { 1248 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1249 fs_reg l = swizzle_dst; 1250 l.reg_offset += i; 1251 1252 if (swiz == SWIZZLE_ZERO) { 1253 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1254 } else if (swiz == SWIZZLE_ONE) { 1255 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1256 } else { 1257 fs_reg r = dst; 1258 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1259 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1260 } 1261 } 1262 this->result = swizzle_dst; 1263 } 1264} 1265 1266void 1267fs_visitor::visit(ir_swizzle *ir) 1268{ 1269 ir->val->accept(this); 1270 fs_reg val = this->result; 1271 1272 if (ir->type->vector_elements == 1) { 1273 this->result.reg_offset += ir->mask.x; 1274 return; 1275 } 1276 1277 fs_reg result = fs_reg(this, ir->type); 1278 this->result = result; 1279 1280 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1281 fs_reg channel = val; 1282 int swiz = 0; 1283 1284 switch (i) { 1285 case 0: 1286 swiz = ir->mask.x; 1287 break; 1288 case 1: 1289 swiz = ir->mask.y; 1290 break; 1291 case 2: 1292 swiz = ir->mask.z; 1293 break; 1294 case 3: 1295 swiz = ir->mask.w; 1296 break; 1297 } 1298 1299 channel.reg_offset += swiz; 1300 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1301 result.reg_offset++; 1302 } 1303} 1304 1305void 1306fs_visitor::visit(ir_discard *ir) 1307{ 1308 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1309 1310 assert(ir->condition == NULL); /* FINISHME */ 1311 1312 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); 1313 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); 1314 kill_emitted = true; 1315} 1316 1317void 1318fs_visitor::visit(ir_constant *ir) 1319{ 1320 fs_reg reg(this, ir->type); 1321 this->result = reg; 1322 1323 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1324 switch (ir->type->base_type) { 1325 case GLSL_TYPE_FLOAT: 1326 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1327 break; 1328 case GLSL_TYPE_UINT: 1329 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1330 break; 1331 case GLSL_TYPE_INT: 1332 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1333 break; 1334 case GLSL_TYPE_BOOL: 1335 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1336 break; 1337 default: 1338 assert(!"Non-float/uint/int/bool constant"); 1339 } 1340 reg.reg_offset++; 1341 } 1342} 1343 1344void 1345fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1346{ 1347 ir_expression *expr = ir->as_expression(); 1348 1349 if (expr) { 1350 fs_reg op[2]; 1351 fs_inst *inst; 1352 1353 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1354 assert(expr->operands[i]->type->is_scalar()); 1355 1356 expr->operands[i]->accept(this); 1357 op[i] = this->result; 1358 } 1359 1360 switch (expr->operation) { 1361 case ir_unop_logic_not: 1362 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); 1363 inst->conditional_mod = BRW_CONDITIONAL_Z; 1364 break; 1365 1366 case ir_binop_logic_xor: 1367 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); 1368 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1369 break; 1370 1371 case ir_binop_logic_or: 1372 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); 1373 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1374 break; 1375 1376 case ir_binop_logic_and: 1377 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); 1378 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1379 break; 1380 1381 case ir_unop_f2b: 1382 if (intel->gen >= 6) { 1383 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1384 op[0], fs_reg(0.0f))); 1385 } else { 1386 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1387 } 1388 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1389 break; 1390 1391 case ir_unop_i2b: 1392 if (intel->gen >= 6) { 1393 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); 1394 } else { 1395 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); 1396 } 1397 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1398 break; 1399 1400 case ir_binop_greater: 1401 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1402 inst->conditional_mod = BRW_CONDITIONAL_G; 1403 break; 1404 case ir_binop_gequal: 1405 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1406 inst->conditional_mod = BRW_CONDITIONAL_GE; 1407 break; 1408 case ir_binop_less: 1409 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1410 inst->conditional_mod = BRW_CONDITIONAL_L; 1411 break; 1412 case ir_binop_lequal: 1413 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1414 inst->conditional_mod = BRW_CONDITIONAL_LE; 1415 break; 1416 case ir_binop_equal: 1417 case ir_binop_all_equal: 1418 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1419 inst->conditional_mod = BRW_CONDITIONAL_Z; 1420 break; 1421 case ir_binop_nequal: 1422 case ir_binop_any_nequal: 1423 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); 1424 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1425 break; 1426 default: 1427 assert(!"not reached"); 1428 this->fail = true; 1429 break; 1430 } 1431 return; 1432 } 1433 1434 ir->accept(this); 1435 1436 if (intel->gen >= 6) { 1437 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, 1438 this->result, fs_reg(1))); 1439 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1440 } else { 1441 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); 1442 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1443 } 1444} 1445 1446/** 1447 * Emit a gen6 IF statement with the comparison folded into the IF 1448 * instruction. 1449 */ 1450void 1451fs_visitor::emit_if_gen6(ir_if *ir) 1452{ 1453 ir_expression *expr = ir->condition->as_expression(); 1454 1455 if (expr) { 1456 fs_reg op[2]; 1457 fs_inst *inst; 1458 fs_reg temp; 1459 1460 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1461 assert(expr->operands[i]->type->is_scalar()); 1462 1463 expr->operands[i]->accept(this); 1464 op[i] = this->result; 1465 } 1466 1467 switch (expr->operation) { 1468 case ir_unop_logic_not: 1469 inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); 1470 inst->conditional_mod = BRW_CONDITIONAL_Z; 1471 return; 1472 1473 case ir_binop_logic_xor: 1474 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1475 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1476 return; 1477 1478 case ir_binop_logic_or: 1479 temp = fs_reg(this, glsl_type::bool_type); 1480 emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); 1481 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1482 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1483 return; 1484 1485 case ir_binop_logic_and: 1486 temp = fs_reg(this, glsl_type::bool_type); 1487 emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); 1488 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); 1489 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1490 return; 1491 1492 case ir_unop_f2b: 1493 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); 1494 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1495 return; 1496 1497 case ir_unop_i2b: 1498 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1499 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1500 return; 1501 1502 case ir_binop_greater: 1503 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1504 inst->conditional_mod = BRW_CONDITIONAL_G; 1505 return; 1506 case ir_binop_gequal: 1507 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1508 inst->conditional_mod = BRW_CONDITIONAL_GE; 1509 return; 1510 case ir_binop_less: 1511 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1512 inst->conditional_mod = BRW_CONDITIONAL_L; 1513 return; 1514 case ir_binop_lequal: 1515 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1516 inst->conditional_mod = BRW_CONDITIONAL_LE; 1517 return; 1518 case ir_binop_equal: 1519 case ir_binop_all_equal: 1520 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1521 inst->conditional_mod = BRW_CONDITIONAL_Z; 1522 return; 1523 case ir_binop_nequal: 1524 case ir_binop_any_nequal: 1525 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); 1526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1527 return; 1528 default: 1529 assert(!"not reached"); 1530 inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); 1531 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1532 this->fail = true; 1533 return; 1534 } 1535 return; 1536 } 1537 1538 ir->condition->accept(this); 1539 1540 fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); 1541 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1542} 1543 1544void 1545fs_visitor::visit(ir_if *ir) 1546{ 1547 fs_inst *inst; 1548 1549 /* Don't point the annotation at the if statement, because then it plus 1550 * the then and else blocks get printed. 1551 */ 1552 this->base_ir = ir->condition; 1553 1554 if (intel->gen >= 6) { 1555 emit_if_gen6(ir); 1556 } else { 1557 emit_bool_to_cond_code(ir->condition); 1558 1559 inst = emit(fs_inst(BRW_OPCODE_IF)); 1560 inst->predicated = true; 1561 } 1562 1563 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1564 ir_instruction *ir = (ir_instruction *)iter.get(); 1565 this->base_ir = ir; 1566 1567 ir->accept(this); 1568 } 1569 1570 if (!ir->else_instructions.is_empty()) { 1571 emit(fs_inst(BRW_OPCODE_ELSE)); 1572 1573 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1574 ir_instruction *ir = (ir_instruction *)iter.get(); 1575 this->base_ir = ir; 1576 1577 ir->accept(this); 1578 } 1579 } 1580 1581 emit(fs_inst(BRW_OPCODE_ENDIF)); 1582} 1583 1584void 1585fs_visitor::visit(ir_loop *ir) 1586{ 1587 fs_reg counter = reg_undef; 1588 1589 if (ir->counter) { 1590 this->base_ir = ir->counter; 1591 ir->counter->accept(this); 1592 counter = *(variable_storage(ir->counter)); 1593 1594 if (ir->from) { 1595 this->base_ir = ir->from; 1596 ir->from->accept(this); 1597 1598 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1599 } 1600 } 1601 1602 emit(fs_inst(BRW_OPCODE_DO)); 1603 1604 if (ir->to) { 1605 this->base_ir = ir->to; 1606 ir->to->accept(this); 1607 1608 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, 1609 counter, this->result)); 1610 switch (ir->cmp) { 1611 case ir_binop_equal: 1612 inst->conditional_mod = BRW_CONDITIONAL_Z; 1613 break; 1614 case ir_binop_nequal: 1615 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1616 break; 1617 case ir_binop_gequal: 1618 inst->conditional_mod = BRW_CONDITIONAL_GE; 1619 break; 1620 case ir_binop_lequal: 1621 inst->conditional_mod = BRW_CONDITIONAL_LE; 1622 break; 1623 case ir_binop_greater: 1624 inst->conditional_mod = BRW_CONDITIONAL_G; 1625 break; 1626 case ir_binop_less: 1627 inst->conditional_mod = BRW_CONDITIONAL_L; 1628 break; 1629 default: 1630 assert(!"not reached: unknown loop condition"); 1631 this->fail = true; 1632 break; 1633 } 1634 1635 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1636 inst->predicated = true; 1637 } 1638 1639 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1640 ir_instruction *ir = (ir_instruction *)iter.get(); 1641 1642 this->base_ir = ir; 1643 ir->accept(this); 1644 } 1645 1646 if (ir->increment) { 1647 this->base_ir = ir->increment; 1648 ir->increment->accept(this); 1649 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1650 } 1651 1652 emit(fs_inst(BRW_OPCODE_WHILE)); 1653} 1654 1655void 1656fs_visitor::visit(ir_loop_jump *ir) 1657{ 1658 switch (ir->mode) { 1659 case ir_loop_jump::jump_break: 1660 emit(fs_inst(BRW_OPCODE_BREAK)); 1661 break; 1662 case ir_loop_jump::jump_continue: 1663 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1664 break; 1665 } 1666} 1667 1668void 1669fs_visitor::visit(ir_call *ir) 1670{ 1671 assert(!"FINISHME"); 1672} 1673 1674void 1675fs_visitor::visit(ir_return *ir) 1676{ 1677 assert(!"FINISHME"); 1678} 1679 1680void 1681fs_visitor::visit(ir_function *ir) 1682{ 1683 /* Ignore function bodies other than main() -- we shouldn't see calls to 1684 * them since they should all be inlined before we get to ir_to_mesa. 1685 */ 1686 if (strcmp(ir->name, "main") == 0) { 1687 const ir_function_signature *sig; 1688 exec_list empty; 1689 1690 sig = ir->matching_signature(&empty); 1691 1692 assert(sig); 1693 1694 foreach_iter(exec_list_iterator, iter, sig->body) { 1695 ir_instruction *ir = (ir_instruction *)iter.get(); 1696 this->base_ir = ir; 1697 1698 ir->accept(this); 1699 } 1700 } 1701} 1702 1703void 1704fs_visitor::visit(ir_function_signature *ir) 1705{ 1706 assert(!"not reached"); 1707 (void)ir; 1708} 1709 1710fs_inst * 1711fs_visitor::emit(fs_inst inst) 1712{ 1713 fs_inst *list_inst = new(mem_ctx) fs_inst; 1714 *list_inst = inst; 1715 1716 list_inst->annotation = this->current_annotation; 1717 list_inst->ir = this->base_ir; 1718 1719 this->instructions.push_tail(list_inst); 1720 1721 return list_inst; 1722} 1723 1724/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1725void 1726fs_visitor::emit_dummy_fs() 1727{ 1728 /* Everyone's favorite color. */ 1729 emit(fs_inst(BRW_OPCODE_MOV, 1730 fs_reg(MRF, 2), 1731 fs_reg(1.0f))); 1732 emit(fs_inst(BRW_OPCODE_MOV, 1733 fs_reg(MRF, 3), 1734 fs_reg(0.0f))); 1735 emit(fs_inst(BRW_OPCODE_MOV, 1736 fs_reg(MRF, 4), 1737 fs_reg(1.0f))); 1738 emit(fs_inst(BRW_OPCODE_MOV, 1739 fs_reg(MRF, 5), 1740 fs_reg(0.0f))); 1741 1742 fs_inst *write; 1743 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1744 fs_reg(0), 1745 fs_reg(0))); 1746 write->base_mrf = 0; 1747} 1748 1749/* The register location here is relative to the start of the URB 1750 * data. It will get adjusted to be a real location before 1751 * generate_code() time. 1752 */ 1753struct brw_reg 1754fs_visitor::interp_reg(int location, int channel) 1755{ 1756 int regnr = urb_setup[location] * 2 + channel / 2; 1757 int stride = (channel & 1) * 4; 1758 1759 assert(urb_setup[location] != -1); 1760 1761 return brw_vec1_grf(regnr, stride); 1762} 1763 1764/** Emits the interpolation for the varying inputs. */ 1765void 1766fs_visitor::emit_interpolation_setup_gen4() 1767{ 1768 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1769 1770 this->current_annotation = "compute pixel centers"; 1771 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1772 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1773 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1774 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1775 emit(fs_inst(BRW_OPCODE_ADD, 1776 this->pixel_x, 1777 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1778 fs_reg(brw_imm_v(0x10101010)))); 1779 emit(fs_inst(BRW_OPCODE_ADD, 1780 this->pixel_y, 1781 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1782 fs_reg(brw_imm_v(0x11001100)))); 1783 1784 this->current_annotation = "compute pixel deltas from v0"; 1785 if (brw->has_pln) { 1786 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1787 this->delta_y = this->delta_x; 1788 this->delta_y.reg_offset++; 1789 } else { 1790 this->delta_x = fs_reg(this, glsl_type::float_type); 1791 this->delta_y = fs_reg(this, glsl_type::float_type); 1792 } 1793 emit(fs_inst(BRW_OPCODE_ADD, 1794 this->delta_x, 1795 this->pixel_x, 1796 fs_reg(negate(brw_vec1_grf(1, 0))))); 1797 emit(fs_inst(BRW_OPCODE_ADD, 1798 this->delta_y, 1799 this->pixel_y, 1800 fs_reg(negate(brw_vec1_grf(1, 1))))); 1801 1802 this->current_annotation = "compute pos.w and 1/pos.w"; 1803 /* Compute wpos.w. It's always in our setup, since it's needed to 1804 * interpolate the other attributes. 1805 */ 1806 this->wpos_w = fs_reg(this, glsl_type::float_type); 1807 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1808 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1809 /* Compute the pixel 1/W value from wpos.w. */ 1810 this->pixel_w = fs_reg(this, glsl_type::float_type); 1811 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1812 this->current_annotation = NULL; 1813} 1814 1815/** Emits the interpolation for the varying inputs. */ 1816void 1817fs_visitor::emit_interpolation_setup_gen6() 1818{ 1819 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1820 1821 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1822 this->current_annotation = "compute pixel centers"; 1823 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1824 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1825 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1826 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1827 emit(fs_inst(BRW_OPCODE_ADD, 1828 int_pixel_x, 1829 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1830 fs_reg(brw_imm_v(0x10101010)))); 1831 emit(fs_inst(BRW_OPCODE_ADD, 1832 int_pixel_y, 1833 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1834 fs_reg(brw_imm_v(0x11001100)))); 1835 1836 /* As of gen6, we can no longer mix float and int sources. We have 1837 * to turn the integer pixel centers into floats for their actual 1838 * use. 1839 */ 1840 this->pixel_x = fs_reg(this, glsl_type::float_type); 1841 this->pixel_y = fs_reg(this, glsl_type::float_type); 1842 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1843 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1844 1845 this->current_annotation = "compute 1/pos.w"; 1846 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1847 this->pixel_w = fs_reg(this, glsl_type::float_type); 1848 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1849 1850 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1851 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1852 1853 this->current_annotation = NULL; 1854} 1855 1856void 1857fs_visitor::emit_fb_writes() 1858{ 1859 this->current_annotation = "FB write header"; 1860 GLboolean header_present = GL_TRUE; 1861 int nr = 0; 1862 1863 if (intel->gen >= 6 && 1864 !this->kill_emitted && 1865 c->key.nr_color_regions == 1) { 1866 header_present = false; 1867 } 1868 1869 if (header_present) { 1870 /* m0, m1 header */ 1871 nr += 2; 1872 } 1873 1874 if (c->key.aa_dest_stencil_reg) { 1875 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1876 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1877 } 1878 1879 /* Reserve space for color. It'll be filled in per MRT below. */ 1880 int color_mrf = nr; 1881 nr += 4; 1882 1883 if (c->key.source_depth_to_render_target) { 1884 if (c->key.computes_depth) { 1885 /* Hand over gl_FragDepth. */ 1886 assert(this->frag_depth); 1887 fs_reg depth = *(variable_storage(this->frag_depth)); 1888 1889 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1890 } else { 1891 /* Pass through the payload depth. */ 1892 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1893 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1894 } 1895 } 1896 1897 if (c->key.dest_depth_reg) { 1898 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1899 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1900 } 1901 1902 fs_reg color = reg_undef; 1903 if (this->frag_color) 1904 color = *(variable_storage(this->frag_color)); 1905 else if (this->frag_data) 1906 color = *(variable_storage(this->frag_data)); 1907 1908 for (int target = 0; target < c->key.nr_color_regions; target++) { 1909 this->current_annotation = talloc_asprintf(this->mem_ctx, 1910 "FB write target %d", 1911 target); 1912 if (this->frag_color || this->frag_data) { 1913 for (int i = 0; i < 4; i++) { 1914 emit(fs_inst(BRW_OPCODE_MOV, 1915 fs_reg(MRF, color_mrf + i), 1916 color)); 1917 color.reg_offset++; 1918 } 1919 } 1920 1921 if (this->frag_color) 1922 color.reg_offset -= 4; 1923 1924 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1925 reg_undef, reg_undef)); 1926 inst->target = target; 1927 inst->base_mrf = 0; 1928 inst->mlen = nr; 1929 if (target == c->key.nr_color_regions - 1) 1930 inst->eot = true; 1931 inst->header_present = header_present; 1932 } 1933 1934 if (c->key.nr_color_regions == 0) { 1935 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1936 reg_undef, reg_undef)); 1937 inst->base_mrf = 0; 1938 inst->mlen = nr; 1939 inst->eot = true; 1940 inst->header_present = header_present; 1941 } 1942 1943 this->current_annotation = NULL; 1944} 1945 1946void 1947fs_visitor::generate_fb_write(fs_inst *inst) 1948{ 1949 GLboolean eot = inst->eot; 1950 struct brw_reg implied_header; 1951 1952 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1953 * move, here's g1. 1954 */ 1955 brw_push_insn_state(p); 1956 brw_set_mask_control(p, BRW_MASK_DISABLE); 1957 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1958 1959 if (inst->header_present) { 1960 if (intel->gen >= 6) { 1961 brw_MOV(p, 1962 brw_message_reg(inst->base_mrf), 1963 brw_vec8_grf(0, 0)); 1964 1965 if (inst->target > 0) { 1966 /* Set the render target index for choosing BLEND_STATE. */ 1967 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), 1968 BRW_REGISTER_TYPE_UD), 1969 brw_imm_ud(inst->target)); 1970 } 1971 1972 /* Clear viewport index, render target array index. */ 1973 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0), 1974 BRW_REGISTER_TYPE_UD), 1975 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), 1976 brw_imm_ud(0xf7ff)); 1977 1978 implied_header = brw_null_reg(); 1979 } else { 1980 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 1981 } 1982 1983 brw_MOV(p, 1984 brw_message_reg(inst->base_mrf + 1), 1985 brw_vec8_grf(1, 0)); 1986 } else { 1987 implied_header = brw_null_reg(); 1988 } 1989 1990 brw_pop_insn_state(p); 1991 1992 brw_fb_WRITE(p, 1993 8, /* dispatch_width */ 1994 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1995 inst->base_mrf, 1996 implied_header, 1997 inst->target, 1998 inst->mlen, 1999 0, 2000 eot); 2001} 2002 2003void 2004fs_visitor::generate_linterp(fs_inst *inst, 2005 struct brw_reg dst, struct brw_reg *src) 2006{ 2007 struct brw_reg delta_x = src[0]; 2008 struct brw_reg delta_y = src[1]; 2009 struct brw_reg interp = src[2]; 2010 2011 if (brw->has_pln && 2012 delta_y.nr == delta_x.nr + 1 && 2013 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 2014 brw_PLN(p, dst, interp, delta_x); 2015 } else { 2016 brw_LINE(p, brw_null_reg(), interp, delta_x); 2017 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 2018 } 2019} 2020 2021void 2022fs_visitor::generate_math(fs_inst *inst, 2023 struct brw_reg dst, struct brw_reg *src) 2024{ 2025 int op; 2026 2027 switch (inst->opcode) { 2028 case FS_OPCODE_RCP: 2029 op = BRW_MATH_FUNCTION_INV; 2030 break; 2031 case FS_OPCODE_RSQ: 2032 op = BRW_MATH_FUNCTION_RSQ; 2033 break; 2034 case FS_OPCODE_SQRT: 2035 op = BRW_MATH_FUNCTION_SQRT; 2036 break; 2037 case FS_OPCODE_EXP2: 2038 op = BRW_MATH_FUNCTION_EXP; 2039 break; 2040 case FS_OPCODE_LOG2: 2041 op = BRW_MATH_FUNCTION_LOG; 2042 break; 2043 case FS_OPCODE_POW: 2044 op = BRW_MATH_FUNCTION_POW; 2045 break; 2046 case FS_OPCODE_SIN: 2047 op = BRW_MATH_FUNCTION_SIN; 2048 break; 2049 case FS_OPCODE_COS: 2050 op = BRW_MATH_FUNCTION_COS; 2051 break; 2052 default: 2053 assert(!"not reached: unknown math function"); 2054 op = 0; 2055 break; 2056 } 2057 2058 if (intel->gen >= 6) { 2059 assert(inst->mlen == 0); 2060 2061 if (inst->opcode == FS_OPCODE_POW) { 2062 brw_math2(p, dst, op, src[0], src[1]); 2063 } else { 2064 brw_math(p, dst, 2065 op, 2066 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2067 BRW_MATH_SATURATE_NONE, 2068 0, src[0], 2069 BRW_MATH_DATA_VECTOR, 2070 BRW_MATH_PRECISION_FULL); 2071 } 2072 } else { 2073 assert(inst->mlen >= 1); 2074 2075 brw_math(p, dst, 2076 op, 2077 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2078 BRW_MATH_SATURATE_NONE, 2079 inst->base_mrf, src[0], 2080 BRW_MATH_DATA_VECTOR, 2081 BRW_MATH_PRECISION_FULL); 2082 } 2083} 2084 2085void 2086fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 2087{ 2088 int msg_type = -1; 2089 int rlen = 4; 2090 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2091 2092 if (intel->gen >= 5) { 2093 switch (inst->opcode) { 2094 case FS_OPCODE_TEX: 2095 if (inst->shadow_compare) { 2096 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2097 } else { 2098 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2099 } 2100 break; 2101 case FS_OPCODE_TXB: 2102 if (inst->shadow_compare) { 2103 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2104 } else { 2105 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2106 } 2107 break; 2108 } 2109 } else { 2110 switch (inst->opcode) { 2111 case FS_OPCODE_TEX: 2112 /* Note that G45 and older determines shadow compare and dispatch width 2113 * from message length for most messages. 2114 */ 2115 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2116 if (inst->shadow_compare) { 2117 assert(inst->mlen == 6); 2118 } else { 2119 assert(inst->mlen <= 4); 2120 } 2121 break; 2122 case FS_OPCODE_TXB: 2123 if (inst->shadow_compare) { 2124 assert(inst->mlen == 6); 2125 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2126 } else { 2127 assert(inst->mlen == 9); 2128 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2129 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2130 } 2131 break; 2132 } 2133 } 2134 assert(msg_type != -1); 2135 2136 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2137 rlen = 8; 2138 dst = vec16(dst); 2139 } 2140 2141 brw_SAMPLE(p, 2142 retype(dst, BRW_REGISTER_TYPE_UW), 2143 inst->base_mrf, 2144 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2145 SURF_INDEX_TEXTURE(inst->sampler), 2146 inst->sampler, 2147 WRITEMASK_XYZW, 2148 msg_type, 2149 rlen, 2150 inst->mlen, 2151 0, 2152 1, 2153 simd_mode); 2154} 2155 2156 2157/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2158 * looking like: 2159 * 2160 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2161 * 2162 * and we're trying to produce: 2163 * 2164 * DDX DDY 2165 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2166 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2167 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2168 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2169 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2170 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2171 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2172 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2173 * 2174 * and add another set of two more subspans if in 16-pixel dispatch mode. 2175 * 2176 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2177 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2178 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2179 * between each other. We could probably do it like ddx and swizzle the right 2180 * order later, but bail for now and just produce 2181 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2182 */ 2183void 2184fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2185{ 2186 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2187 BRW_REGISTER_TYPE_F, 2188 BRW_VERTICAL_STRIDE_2, 2189 BRW_WIDTH_2, 2190 BRW_HORIZONTAL_STRIDE_0, 2191 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2192 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2193 BRW_REGISTER_TYPE_F, 2194 BRW_VERTICAL_STRIDE_2, 2195 BRW_WIDTH_2, 2196 BRW_HORIZONTAL_STRIDE_0, 2197 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2198 brw_ADD(p, dst, src0, negate(src1)); 2199} 2200 2201void 2202fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2203{ 2204 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2205 BRW_REGISTER_TYPE_F, 2206 BRW_VERTICAL_STRIDE_4, 2207 BRW_WIDTH_4, 2208 BRW_HORIZONTAL_STRIDE_0, 2209 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2210 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2211 BRW_REGISTER_TYPE_F, 2212 BRW_VERTICAL_STRIDE_4, 2213 BRW_WIDTH_4, 2214 BRW_HORIZONTAL_STRIDE_0, 2215 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2216 brw_ADD(p, dst, src0, negate(src1)); 2217} 2218 2219void 2220fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2221{ 2222 if (intel->gen >= 6) { 2223 /* Gen6 no longer has the mask reg for us to just read the 2224 * active channels from. However, cmp updates just the channels 2225 * of the flag reg that are enabled, so we can get at the 2226 * channel enables that way. In this step, make a reg of ones 2227 * we'll compare to. 2228 */ 2229 brw_MOV(p, mask, brw_imm_ud(1)); 2230 } else { 2231 brw_push_insn_state(p); 2232 brw_set_mask_control(p, BRW_MASK_DISABLE); 2233 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2234 brw_pop_insn_state(p); 2235 } 2236} 2237 2238void 2239fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2240{ 2241 if (intel->gen >= 6) { 2242 struct brw_reg f0 = brw_flag_reg(); 2243 struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); 2244 2245 brw_push_insn_state(p); 2246 brw_set_mask_control(p, BRW_MASK_DISABLE); 2247 brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */ 2248 brw_pop_insn_state(p); 2249 2250 brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 2251 BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */ 2252 /* Undo CMP's whacking of predication*/ 2253 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2254 2255 brw_push_insn_state(p); 2256 brw_set_mask_control(p, BRW_MASK_DISABLE); 2257 brw_AND(p, g1, f0, g1); 2258 brw_pop_insn_state(p); 2259 } else { 2260 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2261 2262 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2263 2264 brw_push_insn_state(p); 2265 brw_set_mask_control(p, BRW_MASK_DISABLE); 2266 brw_AND(p, g0, mask, g0); 2267 brw_pop_insn_state(p); 2268 } 2269} 2270 2271void 2272fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) 2273{ 2274 assert(inst->mlen != 0); 2275 2276 brw_MOV(p, 2277 retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), 2278 retype(src, BRW_REGISTER_TYPE_UD)); 2279 brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, 2280 inst->offset); 2281} 2282 2283void 2284fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) 2285{ 2286 assert(inst->mlen != 0); 2287 2288 /* Clear any post destination dependencies that would be ignored by 2289 * the block read. See the B-Spec for pre-gen5 send instruction. 2290 * 2291 * This could use a better solution, since texture sampling and 2292 * math reads could potentially run into it as well -- anywhere 2293 * that we have a SEND with a destination that is a register that 2294 * was written but not read within the last N instructions (what's 2295 * N? unsure). This is rare because of dead code elimination, but 2296 * not impossible. 2297 */ 2298 if (intel->gen == 4 && !intel->is_g4x) 2299 brw_MOV(p, brw_null_reg(), dst); 2300 2301 brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, 2302 inst->offset); 2303 2304 if (intel->gen == 4 && !intel->is_g4x) { 2305 /* gen4 errata: destination from a send can't be used as a 2306 * destination until it's been read. Just read it so we don't 2307 * have to worry. 2308 */ 2309 brw_MOV(p, brw_null_reg(), dst); 2310 } 2311} 2312 2313 2314void 2315fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) 2316{ 2317 assert(inst->mlen != 0); 2318 2319 /* Clear any post destination dependencies that would be ignored by 2320 * the block read. See the B-Spec for pre-gen5 send instruction. 2321 * 2322 * This could use a better solution, since texture sampling and 2323 * math reads could potentially run into it as well -- anywhere 2324 * that we have a SEND with a destination that is a register that 2325 * was written but not read within the last N instructions (what's 2326 * N? unsure). This is rare because of dead code elimination, but 2327 * not impossible. 2328 */ 2329 if (intel->gen == 4 && !intel->is_g4x) 2330 brw_MOV(p, brw_null_reg(), dst); 2331 2332 brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 2333 inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); 2334 2335 if (intel->gen == 4 && !intel->is_g4x) { 2336 /* gen4 errata: destination from a send can't be used as a 2337 * destination until it's been read. Just read it so we don't 2338 * have to worry. 2339 */ 2340 brw_MOV(p, brw_null_reg(), dst); 2341 } 2342} 2343 2344void 2345fs_visitor::assign_curb_setup() 2346{ 2347 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 2348 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2349 2350 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2351 foreach_iter(exec_list_iterator, iter, this->instructions) { 2352 fs_inst *inst = (fs_inst *)iter.get(); 2353 2354 for (unsigned int i = 0; i < 3; i++) { 2355 if (inst->src[i].file == UNIFORM) { 2356 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2357 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2358 constant_nr / 8, 2359 constant_nr % 8); 2360 2361 inst->src[i].file = FIXED_HW_REG; 2362 inst->src[i].fixed_hw_reg = brw_reg; 2363 } 2364 } 2365 } 2366} 2367 2368void 2369fs_visitor::calculate_urb_setup() 2370{ 2371 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2372 urb_setup[i] = -1; 2373 } 2374 2375 int urb_next = 0; 2376 /* Figure out where each of the incoming setup attributes lands. */ 2377 if (intel->gen >= 6) { 2378 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2379 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2380 urb_setup[i] = urb_next++; 2381 } 2382 } 2383 } else { 2384 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2385 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2386 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2387 int fp_index; 2388 2389 if (i >= VERT_RESULT_VAR0) 2390 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2391 else if (i <= VERT_RESULT_TEX7) 2392 fp_index = i; 2393 else 2394 fp_index = -1; 2395 2396 if (fp_index >= 0) 2397 urb_setup[fp_index] = urb_next++; 2398 } 2399 } 2400 } 2401 2402 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2403 c->prog_data.urb_read_length = urb_next * 2; 2404} 2405 2406void 2407fs_visitor::assign_urb_setup() 2408{ 2409 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2410 2411 /* Offset all the urb_setup[] index by the actual position of the 2412 * setup regs, now that the location of the constants has been chosen. 2413 */ 2414 foreach_iter(exec_list_iterator, iter, this->instructions) { 2415 fs_inst *inst = (fs_inst *)iter.get(); 2416 2417 if (inst->opcode != FS_OPCODE_LINTERP) 2418 continue; 2419 2420 assert(inst->src[2].file == FIXED_HW_REG); 2421 2422 inst->src[2].fixed_hw_reg.nr += urb_start; 2423 } 2424 2425 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2426} 2427 2428/** 2429 * Split large virtual GRFs into separate components if we can. 2430 * 2431 * This is mostly duplicated with what brw_fs_vector_splitting does, 2432 * but that's really conservative because it's afraid of doing 2433 * splitting that doesn't result in real progress after the rest of 2434 * the optimization phases, which would cause infinite looping in 2435 * optimization. We can do it once here, safely. This also has the 2436 * opportunity to split interpolated values, or maybe even uniforms, 2437 * which we don't have at the IR level. 2438 * 2439 * We want to split, because virtual GRFs are what we register 2440 * allocate and spill (due to contiguousness requirements for some 2441 * instructions), and they're what we naturally generate in the 2442 * codegen process, but most virtual GRFs don't actually need to be 2443 * contiguous sets of GRFs. If we split, we'll end up with reduced 2444 * live intervals and better dead code elimination and coalescing. 2445 */ 2446void 2447fs_visitor::split_virtual_grfs() 2448{ 2449 int num_vars = this->virtual_grf_next; 2450 bool split_grf[num_vars]; 2451 int new_virtual_grf[num_vars]; 2452 2453 /* Try to split anything > 0 sized. */ 2454 for (int i = 0; i < num_vars; i++) { 2455 if (this->virtual_grf_sizes[i] != 1) 2456 split_grf[i] = true; 2457 else 2458 split_grf[i] = false; 2459 } 2460 2461 if (brw->has_pln) { 2462 /* PLN opcodes rely on the delta_xy being contiguous. */ 2463 split_grf[this->delta_x.reg] = false; 2464 } 2465 2466 foreach_iter(exec_list_iterator, iter, this->instructions) { 2467 fs_inst *inst = (fs_inst *)iter.get(); 2468 2469 /* Texturing produces 4 contiguous registers, so no splitting. */ 2470 if ((inst->opcode == FS_OPCODE_TEX || 2471 inst->opcode == FS_OPCODE_TXB || 2472 inst->opcode == FS_OPCODE_TXL) && 2473 inst->dst.file == GRF) { 2474 split_grf[inst->dst.reg] = false; 2475 } 2476 } 2477 2478 /* Allocate new space for split regs. Note that the virtual 2479 * numbers will be contiguous. 2480 */ 2481 for (int i = 0; i < num_vars; i++) { 2482 if (split_grf[i]) { 2483 new_virtual_grf[i] = virtual_grf_alloc(1); 2484 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2485 int reg = virtual_grf_alloc(1); 2486 assert(reg == new_virtual_grf[i] + j - 1); 2487 } 2488 this->virtual_grf_sizes[i] = 1; 2489 } 2490 } 2491 2492 foreach_iter(exec_list_iterator, iter, this->instructions) { 2493 fs_inst *inst = (fs_inst *)iter.get(); 2494 2495 if (inst->dst.file == GRF && 2496 split_grf[inst->dst.reg] && 2497 inst->dst.reg_offset != 0) { 2498 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2499 inst->dst.reg_offset - 1); 2500 inst->dst.reg_offset = 0; 2501 } 2502 for (int i = 0; i < 3; i++) { 2503 if (inst->src[i].file == GRF && 2504 split_grf[inst->src[i].reg] && 2505 inst->src[i].reg_offset != 0) { 2506 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2507 inst->src[i].reg_offset - 1); 2508 inst->src[i].reg_offset = 0; 2509 } 2510 } 2511 } 2512} 2513 2514/** 2515 * Choose accesses from the UNIFORM file to demote to using the pull 2516 * constant buffer. 2517 * 2518 * We allow a fragment shader to have more than the specified minimum 2519 * maximum number of fragment shader uniform components (64). If 2520 * there are too many of these, they'd fill up all of register space. 2521 * So, this will push some of them out to the pull constant buffer and 2522 * update the program to load them. 2523 */ 2524void 2525fs_visitor::setup_pull_constants() 2526{ 2527 /* Only allow 16 registers (128 uniform components) as push constants. */ 2528 unsigned int max_uniform_components = 16 * 8; 2529 if (c->prog_data.nr_params <= max_uniform_components) 2530 return; 2531 2532 /* Just demote the end of the list. We could probably do better 2533 * here, demoting things that are rarely used in the program first. 2534 */ 2535 int pull_uniform_base = max_uniform_components; 2536 int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; 2537 2538 foreach_iter(exec_list_iterator, iter, this->instructions) { 2539 fs_inst *inst = (fs_inst *)iter.get(); 2540 2541 for (int i = 0; i < 3; i++) { 2542 if (inst->src[i].file != UNIFORM) 2543 continue; 2544 2545 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2546 if (uniform_nr < pull_uniform_base) 2547 continue; 2548 2549 fs_reg dst = fs_reg(this, glsl_type::float_type); 2550 fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD, 2551 dst); 2552 pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15; 2553 pull->ir = inst->ir; 2554 pull->annotation = inst->annotation; 2555 pull->base_mrf = 14; 2556 pull->mlen = 1; 2557 2558 inst->insert_before(pull); 2559 2560 inst->src[i].file = GRF; 2561 inst->src[i].reg = dst.reg; 2562 inst->src[i].reg_offset = 0; 2563 inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3; 2564 } 2565 } 2566 2567 for (int i = 0; i < pull_uniform_count; i++) { 2568 c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; 2569 } 2570 c->prog_data.nr_params -= pull_uniform_count; 2571 c->prog_data.nr_pull_params = pull_uniform_count; 2572} 2573 2574void 2575fs_visitor::calculate_live_intervals() 2576{ 2577 int num_vars = this->virtual_grf_next; 2578 int *def = talloc_array(mem_ctx, int, num_vars); 2579 int *use = talloc_array(mem_ctx, int, num_vars); 2580 int loop_depth = 0; 2581 int loop_start = 0; 2582 int bb_header_ip = 0; 2583 2584 for (int i = 0; i < num_vars; i++) { 2585 def[i] = 1 << 30; 2586 use[i] = -1; 2587 } 2588 2589 int ip = 0; 2590 foreach_iter(exec_list_iterator, iter, this->instructions) { 2591 fs_inst *inst = (fs_inst *)iter.get(); 2592 2593 if (inst->opcode == BRW_OPCODE_DO) { 2594 if (loop_depth++ == 0) 2595 loop_start = ip; 2596 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2597 loop_depth--; 2598 2599 if (loop_depth == 0) { 2600 /* Patches up the use of vars marked for being live across 2601 * the whole loop. 2602 */ 2603 for (int i = 0; i < num_vars; i++) { 2604 if (use[i] == loop_start) { 2605 use[i] = ip; 2606 } 2607 } 2608 } 2609 } else { 2610 for (unsigned int i = 0; i < 3; i++) { 2611 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2612 int reg = inst->src[i].reg; 2613 2614 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2615 def[reg] >= bb_header_ip)) { 2616 use[reg] = ip; 2617 } else { 2618 def[reg] = MIN2(loop_start, def[reg]); 2619 use[reg] = loop_start; 2620 2621 /* Nobody else is going to go smash our start to 2622 * later in the loop now, because def[reg] now 2623 * points before the bb header. 2624 */ 2625 } 2626 } 2627 } 2628 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2629 int reg = inst->dst.reg; 2630 2631 if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && 2632 !inst->predicated)) { 2633 def[reg] = MIN2(def[reg], ip); 2634 } else { 2635 def[reg] = MIN2(def[reg], loop_start); 2636 } 2637 } 2638 } 2639 2640 ip++; 2641 2642 /* Set the basic block header IP. This is used for determining 2643 * if a complete def of single-register virtual GRF in a loop 2644 * dominates a use in the same basic block. It's a quick way to 2645 * reduce the live interval range of most register used in a 2646 * loop. 2647 */ 2648 if (inst->opcode == BRW_OPCODE_IF || 2649 inst->opcode == BRW_OPCODE_ELSE || 2650 inst->opcode == BRW_OPCODE_ENDIF || 2651 inst->opcode == BRW_OPCODE_DO || 2652 inst->opcode == BRW_OPCODE_WHILE || 2653 inst->opcode == BRW_OPCODE_BREAK || 2654 inst->opcode == BRW_OPCODE_CONTINUE) { 2655 bb_header_ip = ip; 2656 } 2657 } 2658 2659 talloc_free(this->virtual_grf_def); 2660 talloc_free(this->virtual_grf_use); 2661 this->virtual_grf_def = def; 2662 this->virtual_grf_use = use; 2663} 2664 2665/** 2666 * Attempts to move immediate constants into the immediate 2667 * constant slot of following instructions. 2668 * 2669 * Immediate constants are a bit tricky -- they have to be in the last 2670 * operand slot, you can't do abs/negate on them, 2671 */ 2672 2673bool 2674fs_visitor::propagate_constants() 2675{ 2676 bool progress = false; 2677 2678 foreach_iter(exec_list_iterator, iter, this->instructions) { 2679 fs_inst *inst = (fs_inst *)iter.get(); 2680 2681 if (inst->opcode != BRW_OPCODE_MOV || 2682 inst->predicated || 2683 inst->dst.file != GRF || inst->src[0].file != IMM || 2684 inst->dst.type != inst->src[0].type) 2685 continue; 2686 2687 /* Don't bother with cases where we should have had the 2688 * operation on the constant folded in GLSL already. 2689 */ 2690 if (inst->saturate) 2691 continue; 2692 2693 /* Found a move of a constant to a GRF. Find anything else using the GRF 2694 * before it's written, and replace it with the constant if we can. 2695 */ 2696 exec_list_iterator scan_iter = iter; 2697 scan_iter.next(); 2698 for (; scan_iter.has_next(); scan_iter.next()) { 2699 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2700 2701 if (scan_inst->opcode == BRW_OPCODE_DO || 2702 scan_inst->opcode == BRW_OPCODE_WHILE || 2703 scan_inst->opcode == BRW_OPCODE_ELSE || 2704 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2705 break; 2706 } 2707 2708 for (int i = 2; i >= 0; i--) { 2709 if (scan_inst->src[i].file != GRF || 2710 scan_inst->src[i].reg != inst->dst.reg || 2711 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2712 continue; 2713 2714 /* Don't bother with cases where we should have had the 2715 * operation on the constant folded in GLSL already. 2716 */ 2717 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2718 continue; 2719 2720 switch (scan_inst->opcode) { 2721 case BRW_OPCODE_MOV: 2722 scan_inst->src[i] = inst->src[0]; 2723 progress = true; 2724 break; 2725 2726 case BRW_OPCODE_MUL: 2727 case BRW_OPCODE_ADD: 2728 if (i == 1) { 2729 scan_inst->src[i] = inst->src[0]; 2730 progress = true; 2731 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2732 /* Fit this constant in by commuting the operands */ 2733 scan_inst->src[0] = scan_inst->src[1]; 2734 scan_inst->src[1] = inst->src[0]; 2735 } 2736 break; 2737 case BRW_OPCODE_CMP: 2738 if (i == 1) { 2739 scan_inst->src[i] = inst->src[0]; 2740 progress = true; 2741 } 2742 } 2743 } 2744 2745 if (scan_inst->dst.file == GRF && 2746 scan_inst->dst.reg == inst->dst.reg && 2747 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2748 scan_inst->opcode == FS_OPCODE_TEX)) { 2749 break; 2750 } 2751 } 2752 } 2753 2754 return progress; 2755} 2756/** 2757 * Must be called after calculate_live_intervales() to remove unused 2758 * writes to registers -- register allocation will fail otherwise 2759 * because something deffed but not used won't be considered to 2760 * interfere with other regs. 2761 */ 2762bool 2763fs_visitor::dead_code_eliminate() 2764{ 2765 bool progress = false; 2766 int num_vars = this->virtual_grf_next; 2767 bool dead[num_vars]; 2768 2769 for (int i = 0; i < num_vars; i++) { 2770 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; 2771 2772 if (dead[i]) { 2773 /* Mark off its interval so it won't interfere with anything. */ 2774 this->virtual_grf_def[i] = -1; 2775 this->virtual_grf_use[i] = -1; 2776 } 2777 } 2778 2779 foreach_iter(exec_list_iterator, iter, this->instructions) { 2780 fs_inst *inst = (fs_inst *)iter.get(); 2781 2782 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2783 inst->remove(); 2784 progress = true; 2785 } 2786 } 2787 2788 return progress; 2789} 2790 2791bool 2792fs_visitor::register_coalesce() 2793{ 2794 bool progress = false; 2795 2796 foreach_iter(exec_list_iterator, iter, this->instructions) { 2797 fs_inst *inst = (fs_inst *)iter.get(); 2798 2799 if (inst->opcode != BRW_OPCODE_MOV || 2800 inst->predicated || 2801 inst->saturate || 2802 inst->dst.file != GRF || inst->src[0].file != GRF || 2803 inst->dst.type != inst->src[0].type) 2804 continue; 2805 2806 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2807 * them: check for no writes to either one until the exit of the 2808 * program. 2809 */ 2810 bool interfered = false; 2811 exec_list_iterator scan_iter = iter; 2812 scan_iter.next(); 2813 for (; scan_iter.has_next(); scan_iter.next()) { 2814 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2815 2816 if (scan_inst->opcode == BRW_OPCODE_DO || 2817 scan_inst->opcode == BRW_OPCODE_WHILE || 2818 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2819 interfered = true; 2820 iter = scan_iter; 2821 break; 2822 } 2823 2824 if (scan_inst->dst.file == GRF) { 2825 if (scan_inst->dst.reg == inst->dst.reg && 2826 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2827 scan_inst->opcode == FS_OPCODE_TEX)) { 2828 interfered = true; 2829 break; 2830 } 2831 if (scan_inst->dst.reg == inst->src[0].reg && 2832 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2833 scan_inst->opcode == FS_OPCODE_TEX)) { 2834 interfered = true; 2835 break; 2836 } 2837 } 2838 } 2839 if (interfered) { 2840 continue; 2841 } 2842 2843 /* Update live interval so we don't have to recalculate. */ 2844 this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], 2845 virtual_grf_use[inst->dst.reg]); 2846 2847 /* Rewrite the later usage to point at the source of the move to 2848 * be removed. 2849 */ 2850 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2851 scan_iter.next()) { 2852 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2853 2854 for (int i = 0; i < 3; i++) { 2855 if (scan_inst->src[i].file == GRF && 2856 scan_inst->src[i].reg == inst->dst.reg && 2857 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2858 scan_inst->src[i].reg = inst->src[0].reg; 2859 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2860 scan_inst->src[i].abs |= inst->src[0].abs; 2861 scan_inst->src[i].negate ^= inst->src[0].negate; 2862 scan_inst->src[i].smear = inst->src[0].smear; 2863 } 2864 } 2865 } 2866 2867 inst->remove(); 2868 progress = true; 2869 } 2870 2871 return progress; 2872} 2873 2874 2875bool 2876fs_visitor::compute_to_mrf() 2877{ 2878 bool progress = false; 2879 int next_ip = 0; 2880 2881 foreach_iter(exec_list_iterator, iter, this->instructions) { 2882 fs_inst *inst = (fs_inst *)iter.get(); 2883 2884 int ip = next_ip; 2885 next_ip++; 2886 2887 if (inst->opcode != BRW_OPCODE_MOV || 2888 inst->predicated || 2889 inst->dst.file != MRF || inst->src[0].file != GRF || 2890 inst->dst.type != inst->src[0].type || 2891 inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) 2892 continue; 2893 2894 /* Can't compute-to-MRF this GRF if someone else was going to 2895 * read it later. 2896 */ 2897 if (this->virtual_grf_use[inst->src[0].reg] > ip) 2898 continue; 2899 2900 /* Found a move of a GRF to a MRF. Let's see if we can go 2901 * rewrite the thing that made this GRF to write into the MRF. 2902 */ 2903 bool found = false; 2904 fs_inst *scan_inst; 2905 for (scan_inst = (fs_inst *)inst->prev; 2906 scan_inst->prev != NULL; 2907 scan_inst = (fs_inst *)scan_inst->prev) { 2908 /* We don't handle flow control here. Most computation of 2909 * values that end up in MRFs are shortly before the MRF 2910 * write anyway. 2911 */ 2912 if (scan_inst->opcode == BRW_OPCODE_DO || 2913 scan_inst->opcode == BRW_OPCODE_WHILE || 2914 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2915 break; 2916 } 2917 2918 /* You can't read from an MRF, so if someone else reads our 2919 * MRF's source GRF that we wanted to rewrite, that stops us. 2920 */ 2921 bool interfered = false; 2922 for (int i = 0; i < 3; i++) { 2923 if (scan_inst->src[i].file == GRF && 2924 scan_inst->src[i].reg == inst->src[0].reg && 2925 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 2926 interfered = true; 2927 } 2928 } 2929 if (interfered) 2930 break; 2931 2932 if (scan_inst->dst.file == MRF && 2933 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 2934 /* Somebody else wrote our MRF here, so we can't can't 2935 * compute-to-MRF before that. 2936 */ 2937 break; 2938 } 2939 2940 if (scan_inst->mlen > 0) { 2941 /* Found a SEND instruction, which will do some amount of 2942 * implied write that may overwrite our MRF that we were 2943 * hoping to compute-to-MRF somewhere above it. Nothing 2944 * we have implied-writes more than 2 MRFs from base_mrf, 2945 * though. 2946 */ 2947 int implied_write_len = MIN2(scan_inst->mlen, 2); 2948 if (inst->dst.hw_reg >= scan_inst->base_mrf && 2949 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { 2950 break; 2951 } 2952 } 2953 2954 if (scan_inst->dst.file == GRF && 2955 scan_inst->dst.reg == inst->src[0].reg) { 2956 /* Found the last thing to write our reg we want to turn 2957 * into a compute-to-MRF. 2958 */ 2959 2960 if (scan_inst->opcode == FS_OPCODE_TEX) { 2961 /* texturing writes several continuous regs, so we can't 2962 * compute-to-mrf that. 2963 */ 2964 break; 2965 } 2966 2967 /* If it's predicated, it (probably) didn't populate all 2968 * the channels. 2969 */ 2970 if (scan_inst->predicated) 2971 break; 2972 2973 /* SEND instructions can't have MRF as a destination. */ 2974 if (scan_inst->mlen) 2975 break; 2976 2977 if (intel->gen >= 6) { 2978 /* gen6 math instructions must have the destination be 2979 * GRF, so no compute-to-MRF for them. 2980 */ 2981 if (scan_inst->opcode == FS_OPCODE_RCP || 2982 scan_inst->opcode == FS_OPCODE_RSQ || 2983 scan_inst->opcode == FS_OPCODE_SQRT || 2984 scan_inst->opcode == FS_OPCODE_EXP2 || 2985 scan_inst->opcode == FS_OPCODE_LOG2 || 2986 scan_inst->opcode == FS_OPCODE_SIN || 2987 scan_inst->opcode == FS_OPCODE_COS || 2988 scan_inst->opcode == FS_OPCODE_POW) { 2989 break; 2990 } 2991 } 2992 2993 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 2994 /* Found the creator of our MRF's source value. */ 2995 found = true; 2996 break; 2997 } 2998 } 2999 } 3000 if (found) { 3001 scan_inst->dst.file = MRF; 3002 scan_inst->dst.hw_reg = inst->dst.hw_reg; 3003 scan_inst->saturate |= inst->saturate; 3004 inst->remove(); 3005 progress = true; 3006 } 3007 } 3008 3009 return progress; 3010} 3011 3012bool 3013fs_visitor::virtual_grf_interferes(int a, int b) 3014{ 3015 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 3016 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 3017 3018 /* For dead code, just check if the def interferes with the other range. */ 3019 if (this->virtual_grf_use[a] == -1) { 3020 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 3021 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 3022 } 3023 if (this->virtual_grf_use[b] == -1) { 3024 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 3025 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 3026 } 3027 3028 return start < end; 3029} 3030 3031static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 3032{ 3033 struct brw_reg brw_reg; 3034 3035 switch (reg->file) { 3036 case GRF: 3037 case ARF: 3038 case MRF: 3039 if (reg->smear == -1) { 3040 brw_reg = brw_vec8_reg(reg->file, 3041 reg->hw_reg, 0); 3042 } else { 3043 brw_reg = brw_vec1_reg(reg->file, 3044 reg->hw_reg, reg->smear); 3045 } 3046 brw_reg = retype(brw_reg, reg->type); 3047 break; 3048 case IMM: 3049 switch (reg->type) { 3050 case BRW_REGISTER_TYPE_F: 3051 brw_reg = brw_imm_f(reg->imm.f); 3052 break; 3053 case BRW_REGISTER_TYPE_D: 3054 brw_reg = brw_imm_d(reg->imm.i); 3055 break; 3056 case BRW_REGISTER_TYPE_UD: 3057 brw_reg = brw_imm_ud(reg->imm.u); 3058 break; 3059 default: 3060 assert(!"not reached"); 3061 break; 3062 } 3063 break; 3064 case FIXED_HW_REG: 3065 brw_reg = reg->fixed_hw_reg; 3066 break; 3067 case BAD_FILE: 3068 /* Probably unused. */ 3069 brw_reg = brw_null_reg(); 3070 break; 3071 case UNIFORM: 3072 assert(!"not reached"); 3073 brw_reg = brw_null_reg(); 3074 break; 3075 } 3076 if (reg->abs) 3077 brw_reg = brw_abs(brw_reg); 3078 if (reg->negate) 3079 brw_reg = negate(brw_reg); 3080 3081 return brw_reg; 3082} 3083 3084void 3085fs_visitor::generate_code() 3086{ 3087 int last_native_inst = 0; 3088 struct brw_instruction *if_stack[16], *loop_stack[16]; 3089 int if_stack_depth = 0, loop_stack_depth = 0; 3090 int if_depth_in_loop[16]; 3091 const char *last_annotation_string = NULL; 3092 ir_instruction *last_annotation_ir = NULL; 3093 3094 if (INTEL_DEBUG & DEBUG_WM) { 3095 printf("Native code for fragment shader %d:\n", 3096 ctx->Shader.CurrentProgram->Name); 3097 } 3098 3099 if_depth_in_loop[loop_stack_depth] = 0; 3100 3101 memset(&if_stack, 0, sizeof(if_stack)); 3102 foreach_iter(exec_list_iterator, iter, this->instructions) { 3103 fs_inst *inst = (fs_inst *)iter.get(); 3104 struct brw_reg src[3], dst; 3105 3106 if (INTEL_DEBUG & DEBUG_WM) { 3107 if (last_annotation_ir != inst->ir) { 3108 last_annotation_ir = inst->ir; 3109 if (last_annotation_ir) { 3110 printf(" "); 3111 last_annotation_ir->print(); 3112 printf("\n"); 3113 } 3114 } 3115 if (last_annotation_string != inst->annotation) { 3116 last_annotation_string = inst->annotation; 3117 if (last_annotation_string) 3118 printf(" %s\n", last_annotation_string); 3119 } 3120 } 3121 3122 for (unsigned int i = 0; i < 3; i++) { 3123 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 3124 } 3125 dst = brw_reg_from_fs_reg(&inst->dst); 3126 3127 brw_set_conditionalmod(p, inst->conditional_mod); 3128 brw_set_predicate_control(p, inst->predicated); 3129 3130 switch (inst->opcode) { 3131 case BRW_OPCODE_MOV: 3132 brw_MOV(p, dst, src[0]); 3133 break; 3134 case BRW_OPCODE_ADD: 3135 brw_ADD(p, dst, src[0], src[1]); 3136 break; 3137 case BRW_OPCODE_MUL: 3138 brw_MUL(p, dst, src[0], src[1]); 3139 break; 3140 3141 case BRW_OPCODE_FRC: 3142 brw_FRC(p, dst, src[0]); 3143 break; 3144 case BRW_OPCODE_RNDD: 3145 brw_RNDD(p, dst, src[0]); 3146 break; 3147 case BRW_OPCODE_RNDE: 3148 brw_RNDE(p, dst, src[0]); 3149 break; 3150 case BRW_OPCODE_RNDZ: 3151 brw_RNDZ(p, dst, src[0]); 3152 break; 3153 3154 case BRW_OPCODE_AND: 3155 brw_AND(p, dst, src[0], src[1]); 3156 break; 3157 case BRW_OPCODE_OR: 3158 brw_OR(p, dst, src[0], src[1]); 3159 break; 3160 case BRW_OPCODE_XOR: 3161 brw_XOR(p, dst, src[0], src[1]); 3162 break; 3163 case BRW_OPCODE_NOT: 3164 brw_NOT(p, dst, src[0]); 3165 break; 3166 case BRW_OPCODE_ASR: 3167 brw_ASR(p, dst, src[0], src[1]); 3168 break; 3169 case BRW_OPCODE_SHR: 3170 brw_SHR(p, dst, src[0], src[1]); 3171 break; 3172 case BRW_OPCODE_SHL: 3173 brw_SHL(p, dst, src[0], src[1]); 3174 break; 3175 3176 case BRW_OPCODE_CMP: 3177 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3178 break; 3179 case BRW_OPCODE_SEL: 3180 brw_SEL(p, dst, src[0], src[1]); 3181 break; 3182 3183 case BRW_OPCODE_IF: 3184 assert(if_stack_depth < 16); 3185 if (inst->src[0].file != BAD_FILE) { 3186 assert(intel->gen >= 6); 3187 if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); 3188 } else { 3189 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3190 } 3191 if_depth_in_loop[loop_stack_depth]++; 3192 if_stack_depth++; 3193 break; 3194 3195 case BRW_OPCODE_ELSE: 3196 if_stack[if_stack_depth - 1] = 3197 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3198 break; 3199 case BRW_OPCODE_ENDIF: 3200 if_stack_depth--; 3201 brw_ENDIF(p , if_stack[if_stack_depth]); 3202 if_depth_in_loop[loop_stack_depth]--; 3203 break; 3204 3205 case BRW_OPCODE_DO: 3206 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3207 if_depth_in_loop[loop_stack_depth] = 0; 3208 break; 3209 3210 case BRW_OPCODE_BREAK: 3211 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3212 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3213 break; 3214 case BRW_OPCODE_CONTINUE: 3215 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3216 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3217 break; 3218 3219 case BRW_OPCODE_WHILE: { 3220 struct brw_instruction *inst0, *inst1; 3221 GLuint br = 1; 3222 3223 if (intel->gen >= 5) 3224 br = 2; 3225 3226 assert(loop_stack_depth > 0); 3227 loop_stack_depth--; 3228 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3229 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3230 while (inst0 > loop_stack[loop_stack_depth]) { 3231 inst0--; 3232 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3233 inst0->bits3.if_else.jump_count == 0) { 3234 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3235 } 3236 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3237 inst0->bits3.if_else.jump_count == 0) { 3238 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3239 } 3240 } 3241 } 3242 break; 3243 3244 case FS_OPCODE_RCP: 3245 case FS_OPCODE_RSQ: 3246 case FS_OPCODE_SQRT: 3247 case FS_OPCODE_EXP2: 3248 case FS_OPCODE_LOG2: 3249 case FS_OPCODE_POW: 3250 case FS_OPCODE_SIN: 3251 case FS_OPCODE_COS: 3252 generate_math(inst, dst, src); 3253 break; 3254 case FS_OPCODE_LINTERP: 3255 generate_linterp(inst, dst, src); 3256 break; 3257 case FS_OPCODE_TEX: 3258 case FS_OPCODE_TXB: 3259 case FS_OPCODE_TXL: 3260 generate_tex(inst, dst); 3261 break; 3262 case FS_OPCODE_DISCARD_NOT: 3263 generate_discard_not(inst, dst); 3264 break; 3265 case FS_OPCODE_DISCARD_AND: 3266 generate_discard_and(inst, src[0]); 3267 break; 3268 case FS_OPCODE_DDX: 3269 generate_ddx(inst, dst, src[0]); 3270 break; 3271 case FS_OPCODE_DDY: 3272 generate_ddy(inst, dst, src[0]); 3273 break; 3274 3275 case FS_OPCODE_SPILL: 3276 generate_spill(inst, src[0]); 3277 break; 3278 3279 case FS_OPCODE_UNSPILL: 3280 generate_unspill(inst, dst); 3281 break; 3282 3283 case FS_OPCODE_PULL_CONSTANT_LOAD: 3284 generate_pull_constant_load(inst, dst); 3285 break; 3286 3287 case FS_OPCODE_FB_WRITE: 3288 generate_fb_write(inst); 3289 break; 3290 default: 3291 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3292 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3293 brw_opcodes[inst->opcode].name); 3294 } else { 3295 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3296 } 3297 this->fail = true; 3298 } 3299 3300 if (INTEL_DEBUG & DEBUG_WM) { 3301 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3302 if (0) { 3303 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 3304 ((uint32_t *)&p->store[i])[3], 3305 ((uint32_t *)&p->store[i])[2], 3306 ((uint32_t *)&p->store[i])[1], 3307 ((uint32_t *)&p->store[i])[0]); 3308 } 3309 brw_disasm(stdout, &p->store[i], intel->gen); 3310 printf("\n"); 3311 } 3312 } 3313 3314 last_native_inst = p->nr_insn; 3315 } 3316} 3317 3318GLboolean 3319brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3320{ 3321 struct intel_context *intel = &brw->intel; 3322 struct gl_context *ctx = &intel->ctx; 3323 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 3324 3325 if (!prog) 3326 return GL_FALSE; 3327 3328 struct brw_shader *shader = 3329 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; 3330 if (!shader) 3331 return GL_FALSE; 3332 3333 /* We always use 8-wide mode, at least for now. For one, flow 3334 * control only works in 8-wide. Also, when we're fragment shader 3335 * bound, we're almost always under register pressure as well, so 3336 * 8-wide would save us from the performance cliff of spilling 3337 * regs. 3338 */ 3339 c->dispatch_width = 8; 3340 3341 if (INTEL_DEBUG & DEBUG_WM) { 3342 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3343 _mesa_print_ir(shader->ir, NULL); 3344 printf("\n"); 3345 } 3346 3347 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3348 */ 3349 fs_visitor v(c, shader); 3350 3351 if (0) { 3352 v.emit_dummy_fs(); 3353 } else { 3354 v.calculate_urb_setup(); 3355 if (intel->gen < 6) 3356 v.emit_interpolation_setup_gen4(); 3357 else 3358 v.emit_interpolation_setup_gen6(); 3359 3360 /* Generate FS IR for main(). (the visitor only descends into 3361 * functions called "main"). 3362 */ 3363 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3364 ir_instruction *ir = (ir_instruction *)iter.get(); 3365 v.base_ir = ir; 3366 ir->accept(&v); 3367 } 3368 3369 v.emit_fb_writes(); 3370 3371 v.split_virtual_grfs(); 3372 v.setup_pull_constants(); 3373 3374 v.assign_curb_setup(); 3375 v.assign_urb_setup(); 3376 3377 bool progress; 3378 do { 3379 progress = false; 3380 v.calculate_live_intervals(); 3381 progress = v.propagate_constants() || progress; 3382 progress = v.register_coalesce() || progress; 3383 progress = v.compute_to_mrf() || progress; 3384 progress = v.dead_code_eliminate() || progress; 3385 } while (progress); 3386 3387 if (0) { 3388 /* Debug of register spilling: Go spill everything. */ 3389 int virtual_grf_count = v.virtual_grf_next; 3390 for (int i = 1; i < virtual_grf_count; i++) { 3391 v.spill_reg(i); 3392 } 3393 v.calculate_live_intervals(); 3394 } 3395 3396 if (0) 3397 v.assign_regs_trivial(); 3398 else { 3399 while (!v.assign_regs()) { 3400 if (v.fail) 3401 break; 3402 3403 v.calculate_live_intervals(); 3404 } 3405 } 3406 } 3407 3408 if (!v.fail) 3409 v.generate_code(); 3410 3411 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3412 3413 if (v.fail) 3414 return GL_FALSE; 3415 3416 c->prog_data.total_grf = v.grf_used; 3417 3418 return GL_TRUE; 3419} 3420