brw_fs.cpp revision c6dbf253d284f68b0d0e4a3c145583880855324b
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static int using_new_fs = -1; 52static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 53 54struct gl_shader * 55brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 56{ 57 struct brw_shader *shader; 58 59 shader = talloc_zero(NULL, struct brw_shader); 60 if (shader) { 61 shader->base.Type = type; 62 shader->base.Name = name; 63 _mesa_init_shader(ctx, &shader->base); 64 } 65 66 return &shader->base; 67} 68 69struct gl_shader_program * 70brw_new_shader_program(GLcontext *ctx, GLuint name) 71{ 72 struct brw_shader_program *prog; 73 prog = talloc_zero(NULL, struct brw_shader_program); 74 if (prog) { 75 prog->base.Name = name; 76 _mesa_init_shader_program(ctx, &prog->base); 77 } 78 return &prog->base; 79} 80 81GLboolean 82brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 83{ 84 if (!_mesa_ir_compile_shader(ctx, shader)) 85 return GL_FALSE; 86 87 return GL_TRUE; 88} 89 90GLboolean 91brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 92{ 93 struct intel_context *intel = intel_context(ctx); 94 if (using_new_fs == -1) 95 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 96 97 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 98 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 99 100 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 101 void *mem_ctx = talloc_new(NULL); 102 bool progress; 103 104 if (shader->ir) 105 talloc_free(shader->ir); 106 shader->ir = new(shader) exec_list; 107 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 108 109 do_mat_op_to_vec(shader->ir); 110 do_mod_to_fract(shader->ir); 111 do_div_to_mul_rcp(shader->ir); 112 do_sub_to_add_neg(shader->ir); 113 do_explog_to_explog2(shader->ir); 114 do_lower_texture_projection(shader->ir); 115 brw_do_cubemap_normalize(shader->ir); 116 117 do { 118 progress = false; 119 120 brw_do_channel_expressions(shader->ir); 121 brw_do_vector_splitting(shader->ir); 122 123 progress = do_lower_jumps(shader->ir, true, true, 124 true, /* main return */ 125 false, /* continue */ 126 false /* loops */ 127 ) || progress; 128 129 progress = do_common_optimization(shader->ir, true, 32) || progress; 130 131 progress = lower_noise(shader->ir) || progress; 132 progress = 133 lower_variable_index_to_cond_assign(shader->ir, 134 GL_TRUE, /* input */ 135 GL_TRUE, /* output */ 136 GL_TRUE, /* temp */ 137 GL_TRUE /* uniform */ 138 ) || progress; 139 if (intel->gen == 6) { 140 progress = do_if_to_cond_assign(shader->ir) || progress; 141 } 142 } while (progress); 143 144 validate_ir_tree(shader->ir); 145 146 reparent_ir(shader->ir, shader->ir); 147 talloc_free(mem_ctx); 148 } 149 } 150 151 if (!_mesa_ir_link_shader(ctx, prog)) 152 return GL_FALSE; 153 154 return GL_TRUE; 155} 156 157static int 158type_size(const struct glsl_type *type) 159{ 160 unsigned int size, i; 161 162 switch (type->base_type) { 163 case GLSL_TYPE_UINT: 164 case GLSL_TYPE_INT: 165 case GLSL_TYPE_FLOAT: 166 case GLSL_TYPE_BOOL: 167 return type->components(); 168 case GLSL_TYPE_ARRAY: 169 return type_size(type->fields.array) * type->length; 170 case GLSL_TYPE_STRUCT: 171 size = 0; 172 for (i = 0; i < type->length; i++) { 173 size += type_size(type->fields.structure[i].type); 174 } 175 return size; 176 case GLSL_TYPE_SAMPLER: 177 /* Samplers take up no register space, since they're baked in at 178 * link time. 179 */ 180 return 0; 181 default: 182 assert(!"not reached"); 183 return 0; 184 } 185} 186 187static const fs_reg reg_undef; 188static const fs_reg reg_null(ARF, BRW_ARF_NULL); 189 190int 191fs_visitor::virtual_grf_alloc(int size) 192{ 193 if (virtual_grf_array_size <= virtual_grf_next) { 194 if (virtual_grf_array_size == 0) 195 virtual_grf_array_size = 16; 196 else 197 virtual_grf_array_size *= 2; 198 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 199 int, virtual_grf_array_size); 200 201 /* This slot is always unused. */ 202 virtual_grf_sizes[0] = 0; 203 } 204 virtual_grf_sizes[virtual_grf_next] = size; 205 return virtual_grf_next++; 206} 207 208/** Fixed HW reg constructor. */ 209fs_reg::fs_reg(enum register_file file, int hw_reg) 210{ 211 init(); 212 this->file = file; 213 this->hw_reg = hw_reg; 214 this->type = BRW_REGISTER_TYPE_F; 215} 216 217int 218brw_type_for_base_type(const struct glsl_type *type) 219{ 220 switch (type->base_type) { 221 case GLSL_TYPE_FLOAT: 222 return BRW_REGISTER_TYPE_F; 223 case GLSL_TYPE_INT: 224 case GLSL_TYPE_BOOL: 225 return BRW_REGISTER_TYPE_D; 226 case GLSL_TYPE_UINT: 227 return BRW_REGISTER_TYPE_UD; 228 case GLSL_TYPE_ARRAY: 229 case GLSL_TYPE_STRUCT: 230 /* These should be overridden with the type of the member when 231 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 232 * way to trip up if we don't. 233 */ 234 return BRW_REGISTER_TYPE_UD; 235 default: 236 assert(!"not reached"); 237 return BRW_REGISTER_TYPE_F; 238 } 239} 240 241/** Automatic reg constructor. */ 242fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 243{ 244 init(); 245 246 this->file = GRF; 247 this->reg = v->virtual_grf_alloc(type_size(type)); 248 this->reg_offset = 0; 249 this->type = brw_type_for_base_type(type); 250} 251 252fs_reg * 253fs_visitor::variable_storage(ir_variable *var) 254{ 255 return (fs_reg *)hash_table_find(this->variable_ht, var); 256} 257 258/* Our support for uniforms is piggy-backed on the struct 259 * gl_fragment_program, because that's where the values actually 260 * get stored, rather than in some global gl_shader_program uniform 261 * store. 262 */ 263int 264fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 265{ 266 unsigned int offset = 0; 267 float *vec_values; 268 269 if (type->is_matrix()) { 270 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 271 type->vector_elements, 272 1); 273 274 for (unsigned int i = 0; i < type->matrix_columns; i++) { 275 offset += setup_uniform_values(loc + offset, column); 276 } 277 278 return offset; 279 } 280 281 switch (type->base_type) { 282 case GLSL_TYPE_FLOAT: 283 case GLSL_TYPE_UINT: 284 case GLSL_TYPE_INT: 285 case GLSL_TYPE_BOOL: 286 vec_values = fp->Base.Parameters->ParameterValues[loc]; 287 for (unsigned int i = 0; i < type->vector_elements; i++) { 288 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 289 } 290 return 1; 291 292 case GLSL_TYPE_STRUCT: 293 for (unsigned int i = 0; i < type->length; i++) { 294 offset += setup_uniform_values(loc + offset, 295 type->fields.structure[i].type); 296 } 297 return offset; 298 299 case GLSL_TYPE_ARRAY: 300 for (unsigned int i = 0; i < type->length; i++) { 301 offset += setup_uniform_values(loc + offset, type->fields.array); 302 } 303 return offset; 304 305 case GLSL_TYPE_SAMPLER: 306 /* The sampler takes up a slot, but we don't use any values from it. */ 307 return 1; 308 309 default: 310 assert(!"not reached"); 311 return 0; 312 } 313} 314 315 316/* Our support for builtin uniforms is even scarier than non-builtin. 317 * It sits on top of the PROG_STATE_VAR parameters that are 318 * automatically updated from GL context state. 319 */ 320void 321fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 322{ 323 const struct gl_builtin_uniform_desc *statevar = NULL; 324 325 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 326 statevar = &_mesa_builtin_uniform_desc[i]; 327 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 328 break; 329 } 330 331 if (!statevar->name) { 332 this->fail = true; 333 printf("Failed to find builtin uniform `%s'\n", ir->name); 334 return; 335 } 336 337 int array_count; 338 if (ir->type->is_array()) { 339 array_count = ir->type->length; 340 } else { 341 array_count = 1; 342 } 343 344 for (int a = 0; a < array_count; a++) { 345 for (unsigned int i = 0; i < statevar->num_elements; i++) { 346 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 347 int tokens[STATE_LENGTH]; 348 349 memcpy(tokens, element->tokens, sizeof(element->tokens)); 350 if (ir->type->is_array()) { 351 tokens[1] = a; 352 } 353 354 /* This state reference has already been setup by ir_to_mesa, 355 * but we'll get the same index back here. 356 */ 357 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 358 (gl_state_index *)tokens); 359 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 360 361 /* Add each of the unique swizzles of the element as a 362 * parameter. This'll end up matching the expected layout of 363 * the array/matrix/structure we're trying to fill in. 364 */ 365 int last_swiz = -1; 366 for (unsigned int i = 0; i < 4; i++) { 367 int swiz = GET_SWZ(element->swizzle, i); 368 if (swiz == last_swiz) 369 break; 370 last_swiz = swiz; 371 372 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 373 } 374 } 375 } 376} 377 378fs_reg * 379fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 380{ 381 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 382 fs_reg wpos = *reg; 383 fs_reg neg_y = this->pixel_y; 384 neg_y.negate = true; 385 386 /* gl_FragCoord.x */ 387 if (ir->pixel_center_integer) { 388 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 389 } else { 390 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 391 } 392 wpos.reg_offset++; 393 394 /* gl_FragCoord.y */ 395 if (ir->origin_upper_left && ir->pixel_center_integer) { 396 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 397 } else { 398 fs_reg pixel_y = this->pixel_y; 399 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 400 401 if (!ir->origin_upper_left) { 402 pixel_y.negate = true; 403 offset += c->key.drawable_height - 1.0; 404 } 405 406 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 407 } 408 wpos.reg_offset++; 409 410 /* gl_FragCoord.z */ 411 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 412 interp_reg(FRAG_ATTRIB_WPOS, 2))); 413 wpos.reg_offset++; 414 415 /* gl_FragCoord.w: Already set up in emit_interpolation */ 416 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 417 418 return reg; 419} 420 421fs_reg * 422fs_visitor::emit_general_interpolation(ir_variable *ir) 423{ 424 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 425 /* Interpolation is always in floating point regs. */ 426 reg->type = BRW_REGISTER_TYPE_F; 427 fs_reg attr = *reg; 428 429 unsigned int array_elements; 430 const glsl_type *type; 431 432 if (ir->type->is_array()) { 433 array_elements = ir->type->length; 434 if (array_elements == 0) { 435 this->fail = true; 436 } 437 type = ir->type->fields.array; 438 } else { 439 array_elements = 1; 440 type = ir->type; 441 } 442 443 int location = ir->location; 444 for (unsigned int i = 0; i < array_elements; i++) { 445 for (unsigned int j = 0; j < type->matrix_columns; j++) { 446 if (urb_setup[location] == -1) { 447 /* If there's no incoming setup data for this slot, don't 448 * emit interpolation for it. 449 */ 450 attr.reg_offset += type->vector_elements; 451 location++; 452 continue; 453 } 454 455 for (unsigned int c = 0; c < type->vector_elements; c++) { 456 struct brw_reg interp = interp_reg(location, c); 457 emit(fs_inst(FS_OPCODE_LINTERP, 458 attr, 459 this->delta_x, 460 this->delta_y, 461 fs_reg(interp))); 462 attr.reg_offset++; 463 } 464 465 if (intel->gen < 6) { 466 attr.reg_offset -= type->vector_elements; 467 for (unsigned int c = 0; c < type->vector_elements; c++) { 468 emit(fs_inst(BRW_OPCODE_MUL, 469 attr, 470 attr, 471 this->pixel_w)); 472 attr.reg_offset++; 473 } 474 } 475 location++; 476 } 477 } 478 479 return reg; 480} 481 482fs_reg * 483fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 484{ 485 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 486 487 /* The frontfacing comes in as a bit in the thread payload. */ 488 if (intel->gen >= 6) { 489 emit(fs_inst(BRW_OPCODE_ASR, 490 *reg, 491 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 492 fs_reg(15))); 493 emit(fs_inst(BRW_OPCODE_NOT, 494 *reg, 495 *reg)); 496 emit(fs_inst(BRW_OPCODE_AND, 497 *reg, 498 *reg, 499 fs_reg(1))); 500 } else { 501 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 502 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 503 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 504 * us front face 505 */ 506 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 507 *reg, 508 fs_reg(r1_6ud), 509 fs_reg(1u << 31))); 510 inst->conditional_mod = BRW_CONDITIONAL_L; 511 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 512 } 513 514 return reg; 515} 516 517fs_inst * 518fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 519{ 520 switch (opcode) { 521 case FS_OPCODE_RCP: 522 case FS_OPCODE_RSQ: 523 case FS_OPCODE_SQRT: 524 case FS_OPCODE_EXP2: 525 case FS_OPCODE_LOG2: 526 case FS_OPCODE_SIN: 527 case FS_OPCODE_COS: 528 break; 529 default: 530 assert(!"not reached: bad math opcode"); 531 return NULL; 532 } 533 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 534 535 inst->base_mrf = 2; 536 inst->mlen = 1; 537 538 return inst; 539} 540 541fs_inst * 542fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 543{ 544 assert(opcode == FS_OPCODE_POW); 545 546 fs_inst *inst = emit(fs_inst(opcode, dst, src0, src1)); 547 548 inst->base_mrf = 2; 549 inst->mlen = 2; 550 551 return inst; 552} 553 554void 555fs_visitor::visit(ir_variable *ir) 556{ 557 fs_reg *reg = NULL; 558 559 if (variable_storage(ir)) 560 return; 561 562 if (strcmp(ir->name, "gl_FragColor") == 0) { 563 this->frag_color = ir; 564 } else if (strcmp(ir->name, "gl_FragData") == 0) { 565 this->frag_data = ir; 566 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 567 this->frag_depth = ir; 568 } 569 570 if (ir->mode == ir_var_in) { 571 if (!strcmp(ir->name, "gl_FragCoord")) { 572 reg = emit_fragcoord_interpolation(ir); 573 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 574 reg = emit_frontfacing_interpolation(ir); 575 } else { 576 reg = emit_general_interpolation(ir); 577 } 578 assert(reg); 579 hash_table_insert(this->variable_ht, reg, ir); 580 return; 581 } 582 583 if (ir->mode == ir_var_uniform) { 584 int param_index = c->prog_data.nr_params; 585 586 if (!strncmp(ir->name, "gl_", 3)) { 587 setup_builtin_uniform_values(ir); 588 } else { 589 setup_uniform_values(ir->location, ir->type); 590 } 591 592 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 593 } 594 595 if (!reg) 596 reg = new(this->mem_ctx) fs_reg(this, ir->type); 597 598 hash_table_insert(this->variable_ht, reg, ir); 599} 600 601void 602fs_visitor::visit(ir_dereference_variable *ir) 603{ 604 fs_reg *reg = variable_storage(ir->var); 605 this->result = *reg; 606} 607 608void 609fs_visitor::visit(ir_dereference_record *ir) 610{ 611 const glsl_type *struct_type = ir->record->type; 612 613 ir->record->accept(this); 614 615 unsigned int offset = 0; 616 for (unsigned int i = 0; i < struct_type->length; i++) { 617 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 618 break; 619 offset += type_size(struct_type->fields.structure[i].type); 620 } 621 this->result.reg_offset += offset; 622 this->result.type = brw_type_for_base_type(ir->type); 623} 624 625void 626fs_visitor::visit(ir_dereference_array *ir) 627{ 628 ir_constant *index; 629 int element_size; 630 631 ir->array->accept(this); 632 index = ir->array_index->as_constant(); 633 634 element_size = type_size(ir->type); 635 this->result.type = brw_type_for_base_type(ir->type); 636 637 if (index) { 638 assert(this->result.file == UNIFORM || 639 (this->result.file == GRF && 640 this->result.reg != 0)); 641 this->result.reg_offset += index->value.i[0] * element_size; 642 } else { 643 assert(!"FINISHME: non-constant array element"); 644 } 645} 646 647void 648fs_visitor::visit(ir_expression *ir) 649{ 650 unsigned int operand; 651 fs_reg op[2], temp; 652 fs_reg result; 653 fs_inst *inst; 654 655 for (operand = 0; operand < ir->get_num_operands(); operand++) { 656 ir->operands[operand]->accept(this); 657 if (this->result.file == BAD_FILE) { 658 ir_print_visitor v; 659 printf("Failed to get tree for expression operand:\n"); 660 ir->operands[operand]->accept(&v); 661 this->fail = true; 662 } 663 op[operand] = this->result; 664 665 /* Matrix expression operands should have been broken down to vector 666 * operations already. 667 */ 668 assert(!ir->operands[operand]->type->is_matrix()); 669 /* And then those vector operands should have been broken down to scalar. 670 */ 671 assert(!ir->operands[operand]->type->is_vector()); 672 } 673 674 /* Storage for our result. If our result goes into an assignment, it will 675 * just get copy-propagated out, so no worries. 676 */ 677 this->result = fs_reg(this, ir->type); 678 679 switch (ir->operation) { 680 case ir_unop_logic_not: 681 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 682 break; 683 case ir_unop_neg: 684 op[0].negate = !op[0].negate; 685 this->result = op[0]; 686 break; 687 case ir_unop_abs: 688 op[0].abs = true; 689 this->result = op[0]; 690 break; 691 case ir_unop_sign: 692 temp = fs_reg(this, ir->type); 693 694 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 695 696 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 697 inst->conditional_mod = BRW_CONDITIONAL_G; 698 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 699 inst->predicated = true; 700 701 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 702 inst->conditional_mod = BRW_CONDITIONAL_L; 703 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 704 inst->predicated = true; 705 706 break; 707 case ir_unop_rcp: 708 emit_math(FS_OPCODE_RCP, this->result, op[0]); 709 break; 710 711 case ir_unop_exp2: 712 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 713 break; 714 case ir_unop_log2: 715 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 716 break; 717 case ir_unop_exp: 718 case ir_unop_log: 719 assert(!"not reached: should be handled by ir_explog_to_explog2"); 720 break; 721 case ir_unop_sin: 722 emit_math(FS_OPCODE_SIN, this->result, op[0]); 723 break; 724 case ir_unop_cos: 725 emit_math(FS_OPCODE_COS, this->result, op[0]); 726 break; 727 728 case ir_unop_dFdx: 729 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 730 break; 731 case ir_unop_dFdy: 732 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 733 break; 734 735 case ir_binop_add: 736 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 737 break; 738 case ir_binop_sub: 739 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 740 break; 741 742 case ir_binop_mul: 743 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 744 break; 745 case ir_binop_div: 746 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 747 break; 748 case ir_binop_mod: 749 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 750 break; 751 752 case ir_binop_less: 753 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 754 inst->conditional_mod = BRW_CONDITIONAL_L; 755 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 756 break; 757 case ir_binop_greater: 758 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 759 inst->conditional_mod = BRW_CONDITIONAL_G; 760 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 761 break; 762 case ir_binop_lequal: 763 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 764 inst->conditional_mod = BRW_CONDITIONAL_LE; 765 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 766 break; 767 case ir_binop_gequal: 768 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 769 inst->conditional_mod = BRW_CONDITIONAL_GE; 770 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 771 break; 772 case ir_binop_equal: 773 case ir_binop_all_equal: /* same as nequal for scalars */ 774 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 775 inst->conditional_mod = BRW_CONDITIONAL_Z; 776 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 777 break; 778 case ir_binop_nequal: 779 case ir_binop_any_nequal: /* same as nequal for scalars */ 780 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 781 inst->conditional_mod = BRW_CONDITIONAL_NZ; 782 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 783 break; 784 785 case ir_binop_logic_xor: 786 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 787 break; 788 789 case ir_binop_logic_or: 790 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 791 break; 792 793 case ir_binop_logic_and: 794 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 795 break; 796 797 case ir_binop_dot: 798 case ir_binop_cross: 799 case ir_unop_any: 800 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 801 break; 802 803 case ir_unop_noise: 804 assert(!"not reached: should be handled by lower_noise"); 805 break; 806 807 case ir_unop_sqrt: 808 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 809 break; 810 811 case ir_unop_rsq: 812 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 813 break; 814 815 case ir_unop_i2f: 816 case ir_unop_b2f: 817 case ir_unop_b2i: 818 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 819 break; 820 case ir_unop_f2i: 821 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 822 break; 823 case ir_unop_f2b: 824 case ir_unop_i2b: 825 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 826 inst->conditional_mod = BRW_CONDITIONAL_NZ; 827 828 case ir_unop_trunc: 829 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 830 break; 831 case ir_unop_ceil: 832 op[0].negate = ~op[0].negate; 833 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 834 this->result.negate = true; 835 break; 836 case ir_unop_floor: 837 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 838 break; 839 case ir_unop_fract: 840 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 841 break; 842 843 case ir_binop_min: 844 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 845 inst->conditional_mod = BRW_CONDITIONAL_L; 846 847 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 848 inst->predicated = true; 849 break; 850 case ir_binop_max: 851 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 852 inst->conditional_mod = BRW_CONDITIONAL_G; 853 854 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 855 inst->predicated = true; 856 break; 857 858 case ir_binop_pow: 859 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 860 break; 861 862 case ir_unop_bit_not: 863 case ir_unop_u2f: 864 case ir_binop_lshift: 865 case ir_binop_rshift: 866 case ir_binop_bit_and: 867 case ir_binop_bit_xor: 868 case ir_binop_bit_or: 869 assert(!"GLSL 1.30 features unsupported"); 870 break; 871 } 872} 873 874void 875fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 876 const glsl_type *type, bool predicated) 877{ 878 switch (type->base_type) { 879 case GLSL_TYPE_FLOAT: 880 case GLSL_TYPE_UINT: 881 case GLSL_TYPE_INT: 882 case GLSL_TYPE_BOOL: 883 for (unsigned int i = 0; i < type->components(); i++) { 884 l.type = brw_type_for_base_type(type); 885 r.type = brw_type_for_base_type(type); 886 887 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 888 inst->predicated = predicated; 889 890 l.reg_offset++; 891 r.reg_offset++; 892 } 893 break; 894 case GLSL_TYPE_ARRAY: 895 for (unsigned int i = 0; i < type->length; i++) { 896 emit_assignment_writes(l, r, type->fields.array, predicated); 897 } 898 899 case GLSL_TYPE_STRUCT: 900 for (unsigned int i = 0; i < type->length; i++) { 901 emit_assignment_writes(l, r, type->fields.structure[i].type, 902 predicated); 903 } 904 break; 905 906 case GLSL_TYPE_SAMPLER: 907 break; 908 909 default: 910 assert(!"not reached"); 911 break; 912 } 913} 914 915void 916fs_visitor::visit(ir_assignment *ir) 917{ 918 struct fs_reg l, r; 919 fs_inst *inst; 920 921 /* FINISHME: arrays on the lhs */ 922 ir->lhs->accept(this); 923 l = this->result; 924 925 ir->rhs->accept(this); 926 r = this->result; 927 928 assert(l.file != BAD_FILE); 929 assert(r.file != BAD_FILE); 930 931 if (ir->condition) { 932 /* Get the condition bool into the predicate. */ 933 ir->condition->accept(this); 934 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 935 inst->conditional_mod = BRW_CONDITIONAL_NZ; 936 } 937 938 if (ir->lhs->type->is_scalar() || 939 ir->lhs->type->is_vector()) { 940 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 941 if (ir->write_mask & (1 << i)) { 942 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 943 if (ir->condition) 944 inst->predicated = true; 945 r.reg_offset++; 946 } 947 l.reg_offset++; 948 } 949 } else { 950 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 951 } 952} 953 954fs_inst * 955fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 956{ 957 int mlen; 958 int base_mrf = 1; 959 bool simd16 = false; 960 fs_reg orig_dst; 961 962 /* g0 header. */ 963 mlen = 1; 964 965 if (ir->shadow_comparitor) { 966 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 967 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 968 coordinate)); 969 coordinate.reg_offset++; 970 } 971 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 972 mlen += 3; 973 974 if (ir->op == ir_tex) { 975 /* There's no plain shadow compare message, so we use shadow 976 * compare with a bias of 0.0. 977 */ 978 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 979 fs_reg(0.0f))); 980 mlen++; 981 } else if (ir->op == ir_txb) { 982 ir->lod_info.bias->accept(this); 983 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 984 this->result)); 985 mlen++; 986 } else { 987 assert(ir->op == ir_txl); 988 ir->lod_info.lod->accept(this); 989 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 990 this->result)); 991 mlen++; 992 } 993 994 ir->shadow_comparitor->accept(this); 995 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 996 mlen++; 997 } else if (ir->op == ir_tex) { 998 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 999 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1000 coordinate)); 1001 coordinate.reg_offset++; 1002 } 1003 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1004 mlen += 3; 1005 } else { 1006 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1007 * instructions. We'll need to do SIMD16 here. 1008 */ 1009 assert(ir->op == ir_txb || ir->op == ir_txl); 1010 1011 for (int i = 0; i < ir->coordinate->type->vector_elements * 2;) { 1012 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1013 coordinate)); 1014 coordinate.reg_offset++; 1015 } 1016 1017 /* lod/bias appears after u/v/r. */ 1018 mlen += 6; 1019 1020 if (ir->op == ir_txb) { 1021 ir->lod_info.bias->accept(this); 1022 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1023 this->result)); 1024 mlen++; 1025 } else { 1026 ir->lod_info.lod->accept(this); 1027 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1028 this->result)); 1029 mlen++; 1030 } 1031 1032 /* The unused upper half. */ 1033 mlen++; 1034 1035 /* Now, since we're doing simd16, the return is 2 interleaved 1036 * vec4s where the odd-indexed ones are junk. We'll need to move 1037 * this weirdness around to the expected layout. 1038 */ 1039 simd16 = true; 1040 orig_dst = dst; 1041 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1042 2)); 1043 dst.type = BRW_REGISTER_TYPE_F; 1044 } 1045 1046 fs_inst *inst = NULL; 1047 switch (ir->op) { 1048 case ir_tex: 1049 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1050 break; 1051 case ir_txb: 1052 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1053 break; 1054 case ir_txl: 1055 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1056 break; 1057 case ir_txd: 1058 case ir_txf: 1059 assert(!"GLSL 1.30 features unsupported"); 1060 break; 1061 } 1062 inst->base_mrf = base_mrf; 1063 inst->mlen = mlen; 1064 1065 if (simd16) { 1066 for (int i = 0; i < 4; i++) { 1067 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1068 orig_dst.reg_offset++; 1069 dst.reg_offset += 2; 1070 } 1071 } 1072 1073 return inst; 1074} 1075 1076fs_inst * 1077fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1078{ 1079 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1080 * optional parameters like shadow comparitor or LOD bias. If 1081 * optional parameters aren't present, those base slots are 1082 * optional and don't need to be included in the message. 1083 * 1084 * We don't fill in the unnecessary slots regardless, which may 1085 * look surprising in the disassembly. 1086 */ 1087 int mlen = 1; /* g0 header always present. */ 1088 int base_mrf = 1; 1089 1090 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1091 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1092 coordinate)); 1093 coordinate.reg_offset++; 1094 } 1095 mlen += ir->coordinate->type->vector_elements; 1096 1097 if (ir->shadow_comparitor) { 1098 mlen = MAX2(mlen, 5); 1099 1100 ir->shadow_comparitor->accept(this); 1101 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1102 mlen++; 1103 } 1104 1105 fs_inst *inst = NULL; 1106 switch (ir->op) { 1107 case ir_tex: 1108 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1109 break; 1110 case ir_txb: 1111 ir->lod_info.bias->accept(this); 1112 mlen = MAX2(mlen, 5); 1113 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1114 mlen++; 1115 1116 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1117 break; 1118 case ir_txl: 1119 ir->lod_info.lod->accept(this); 1120 mlen = MAX2(mlen, 5); 1121 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1122 mlen++; 1123 1124 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1125 break; 1126 case ir_txd: 1127 case ir_txf: 1128 assert(!"GLSL 1.30 features unsupported"); 1129 break; 1130 } 1131 inst->base_mrf = base_mrf; 1132 inst->mlen = mlen; 1133 1134 return inst; 1135} 1136 1137void 1138fs_visitor::visit(ir_texture *ir) 1139{ 1140 fs_inst *inst = NULL; 1141 1142 ir->coordinate->accept(this); 1143 fs_reg coordinate = this->result; 1144 1145 /* Should be lowered by do_lower_texture_projection */ 1146 assert(!ir->projector); 1147 1148 /* Writemasking doesn't eliminate channels on SIMD8 texture 1149 * samples, so don't worry about them. 1150 */ 1151 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1152 1153 if (intel->gen < 5) { 1154 inst = emit_texture_gen4(ir, dst, coordinate); 1155 } else { 1156 inst = emit_texture_gen5(ir, dst, coordinate); 1157 } 1158 1159 inst->sampler = 1160 _mesa_get_sampler_uniform_value(ir->sampler, 1161 ctx->Shader.CurrentProgram, 1162 &brw->fragment_program->Base); 1163 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1164 1165 this->result = dst; 1166 1167 if (ir->shadow_comparitor) 1168 inst->shadow_compare = true; 1169 1170 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1171 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1172 1173 for (int i = 0; i < 4; i++) { 1174 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1175 fs_reg l = swizzle_dst; 1176 l.reg_offset += i; 1177 1178 if (swiz == SWIZZLE_ZERO) { 1179 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1180 } else if (swiz == SWIZZLE_ONE) { 1181 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1182 } else { 1183 fs_reg r = dst; 1184 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1185 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1186 } 1187 } 1188 this->result = swizzle_dst; 1189 } 1190} 1191 1192void 1193fs_visitor::visit(ir_swizzle *ir) 1194{ 1195 ir->val->accept(this); 1196 fs_reg val = this->result; 1197 1198 if (ir->type->vector_elements == 1) { 1199 this->result.reg_offset += ir->mask.x; 1200 return; 1201 } 1202 1203 fs_reg result = fs_reg(this, ir->type); 1204 this->result = result; 1205 1206 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1207 fs_reg channel = val; 1208 int swiz = 0; 1209 1210 switch (i) { 1211 case 0: 1212 swiz = ir->mask.x; 1213 break; 1214 case 1: 1215 swiz = ir->mask.y; 1216 break; 1217 case 2: 1218 swiz = ir->mask.z; 1219 break; 1220 case 3: 1221 swiz = ir->mask.w; 1222 break; 1223 } 1224 1225 channel.reg_offset += swiz; 1226 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1227 result.reg_offset++; 1228 } 1229} 1230 1231void 1232fs_visitor::visit(ir_discard *ir) 1233{ 1234 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1235 1236 assert(ir->condition == NULL); /* FINISHME */ 1237 1238 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null)); 1239 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp)); 1240 kill_emitted = true; 1241} 1242 1243void 1244fs_visitor::visit(ir_constant *ir) 1245{ 1246 fs_reg reg(this, ir->type); 1247 this->result = reg; 1248 1249 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1250 switch (ir->type->base_type) { 1251 case GLSL_TYPE_FLOAT: 1252 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1253 break; 1254 case GLSL_TYPE_UINT: 1255 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1256 break; 1257 case GLSL_TYPE_INT: 1258 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1259 break; 1260 case GLSL_TYPE_BOOL: 1261 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1262 break; 1263 default: 1264 assert(!"Non-float/uint/int/bool constant"); 1265 } 1266 reg.reg_offset++; 1267 } 1268} 1269 1270void 1271fs_visitor::visit(ir_if *ir) 1272{ 1273 fs_inst *inst; 1274 1275 /* Don't point the annotation at the if statement, because then it plus 1276 * the then and else blocks get printed. 1277 */ 1278 this->base_ir = ir->condition; 1279 1280 /* Generate the condition into the condition code. */ 1281 ir->condition->accept(this); 1282 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1283 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1284 1285 inst = emit(fs_inst(BRW_OPCODE_IF)); 1286 inst->predicated = true; 1287 1288 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1289 ir_instruction *ir = (ir_instruction *)iter.get(); 1290 this->base_ir = ir; 1291 1292 ir->accept(this); 1293 } 1294 1295 if (!ir->else_instructions.is_empty()) { 1296 emit(fs_inst(BRW_OPCODE_ELSE)); 1297 1298 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1299 ir_instruction *ir = (ir_instruction *)iter.get(); 1300 this->base_ir = ir; 1301 1302 ir->accept(this); 1303 } 1304 } 1305 1306 emit(fs_inst(BRW_OPCODE_ENDIF)); 1307} 1308 1309void 1310fs_visitor::visit(ir_loop *ir) 1311{ 1312 fs_reg counter = reg_undef; 1313 1314 if (ir->counter) { 1315 this->base_ir = ir->counter; 1316 ir->counter->accept(this); 1317 counter = *(variable_storage(ir->counter)); 1318 1319 if (ir->from) { 1320 this->base_ir = ir->from; 1321 ir->from->accept(this); 1322 1323 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1324 } 1325 } 1326 1327 emit(fs_inst(BRW_OPCODE_DO)); 1328 1329 if (ir->to) { 1330 this->base_ir = ir->to; 1331 ir->to->accept(this); 1332 1333 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1334 counter, this->result)); 1335 switch (ir->cmp) { 1336 case ir_binop_equal: 1337 inst->conditional_mod = BRW_CONDITIONAL_Z; 1338 break; 1339 case ir_binop_nequal: 1340 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1341 break; 1342 case ir_binop_gequal: 1343 inst->conditional_mod = BRW_CONDITIONAL_GE; 1344 break; 1345 case ir_binop_lequal: 1346 inst->conditional_mod = BRW_CONDITIONAL_LE; 1347 break; 1348 case ir_binop_greater: 1349 inst->conditional_mod = BRW_CONDITIONAL_G; 1350 break; 1351 case ir_binop_less: 1352 inst->conditional_mod = BRW_CONDITIONAL_L; 1353 break; 1354 default: 1355 assert(!"not reached: unknown loop condition"); 1356 this->fail = true; 1357 break; 1358 } 1359 1360 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1361 inst->predicated = true; 1362 } 1363 1364 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1365 ir_instruction *ir = (ir_instruction *)iter.get(); 1366 1367 this->base_ir = ir; 1368 ir->accept(this); 1369 } 1370 1371 if (ir->increment) { 1372 this->base_ir = ir->increment; 1373 ir->increment->accept(this); 1374 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1375 } 1376 1377 emit(fs_inst(BRW_OPCODE_WHILE)); 1378} 1379 1380void 1381fs_visitor::visit(ir_loop_jump *ir) 1382{ 1383 switch (ir->mode) { 1384 case ir_loop_jump::jump_break: 1385 emit(fs_inst(BRW_OPCODE_BREAK)); 1386 break; 1387 case ir_loop_jump::jump_continue: 1388 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1389 break; 1390 } 1391} 1392 1393void 1394fs_visitor::visit(ir_call *ir) 1395{ 1396 assert(!"FINISHME"); 1397} 1398 1399void 1400fs_visitor::visit(ir_return *ir) 1401{ 1402 assert(!"FINISHME"); 1403} 1404 1405void 1406fs_visitor::visit(ir_function *ir) 1407{ 1408 /* Ignore function bodies other than main() -- we shouldn't see calls to 1409 * them since they should all be inlined before we get to ir_to_mesa. 1410 */ 1411 if (strcmp(ir->name, "main") == 0) { 1412 const ir_function_signature *sig; 1413 exec_list empty; 1414 1415 sig = ir->matching_signature(&empty); 1416 1417 assert(sig); 1418 1419 foreach_iter(exec_list_iterator, iter, sig->body) { 1420 ir_instruction *ir = (ir_instruction *)iter.get(); 1421 this->base_ir = ir; 1422 1423 ir->accept(this); 1424 } 1425 } 1426} 1427 1428void 1429fs_visitor::visit(ir_function_signature *ir) 1430{ 1431 assert(!"not reached"); 1432 (void)ir; 1433} 1434 1435fs_inst * 1436fs_visitor::emit(fs_inst inst) 1437{ 1438 fs_inst *list_inst = new(mem_ctx) fs_inst; 1439 *list_inst = inst; 1440 1441 list_inst->annotation = this->current_annotation; 1442 list_inst->ir = this->base_ir; 1443 1444 this->instructions.push_tail(list_inst); 1445 1446 return list_inst; 1447} 1448 1449/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1450void 1451fs_visitor::emit_dummy_fs() 1452{ 1453 /* Everyone's favorite color. */ 1454 emit(fs_inst(BRW_OPCODE_MOV, 1455 fs_reg(MRF, 2), 1456 fs_reg(1.0f))); 1457 emit(fs_inst(BRW_OPCODE_MOV, 1458 fs_reg(MRF, 3), 1459 fs_reg(0.0f))); 1460 emit(fs_inst(BRW_OPCODE_MOV, 1461 fs_reg(MRF, 4), 1462 fs_reg(1.0f))); 1463 emit(fs_inst(BRW_OPCODE_MOV, 1464 fs_reg(MRF, 5), 1465 fs_reg(0.0f))); 1466 1467 fs_inst *write; 1468 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1469 fs_reg(0), 1470 fs_reg(0))); 1471 write->base_mrf = 0; 1472} 1473 1474/* The register location here is relative to the start of the URB 1475 * data. It will get adjusted to be a real location before 1476 * generate_code() time. 1477 */ 1478struct brw_reg 1479fs_visitor::interp_reg(int location, int channel) 1480{ 1481 int regnr = urb_setup[location] * 2 + channel / 2; 1482 int stride = (channel & 1) * 4; 1483 1484 assert(urb_setup[location] != -1); 1485 1486 return brw_vec1_grf(regnr, stride); 1487} 1488 1489/** Emits the interpolation for the varying inputs. */ 1490void 1491fs_visitor::emit_interpolation_setup_gen4() 1492{ 1493 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1494 1495 this->current_annotation = "compute pixel centers"; 1496 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1497 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1498 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1499 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1500 emit(fs_inst(BRW_OPCODE_ADD, 1501 this->pixel_x, 1502 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1503 fs_reg(brw_imm_v(0x10101010)))); 1504 emit(fs_inst(BRW_OPCODE_ADD, 1505 this->pixel_y, 1506 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1507 fs_reg(brw_imm_v(0x11001100)))); 1508 1509 this->current_annotation = "compute pixel deltas from v0"; 1510 if (brw->has_pln) { 1511 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1512 this->delta_y = this->delta_x; 1513 this->delta_y.reg_offset++; 1514 } else { 1515 this->delta_x = fs_reg(this, glsl_type::float_type); 1516 this->delta_y = fs_reg(this, glsl_type::float_type); 1517 } 1518 emit(fs_inst(BRW_OPCODE_ADD, 1519 this->delta_x, 1520 this->pixel_x, 1521 fs_reg(negate(brw_vec1_grf(1, 0))))); 1522 emit(fs_inst(BRW_OPCODE_ADD, 1523 this->delta_y, 1524 this->pixel_y, 1525 fs_reg(negate(brw_vec1_grf(1, 1))))); 1526 1527 this->current_annotation = "compute pos.w and 1/pos.w"; 1528 /* Compute wpos.w. It's always in our setup, since it's needed to 1529 * interpolate the other attributes. 1530 */ 1531 this->wpos_w = fs_reg(this, glsl_type::float_type); 1532 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1533 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1534 /* Compute the pixel 1/W value from wpos.w. */ 1535 this->pixel_w = fs_reg(this, glsl_type::float_type); 1536 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1537 this->current_annotation = NULL; 1538} 1539 1540/** Emits the interpolation for the varying inputs. */ 1541void 1542fs_visitor::emit_interpolation_setup_gen6() 1543{ 1544 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1545 1546 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1547 this->current_annotation = "compute pixel centers"; 1548 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1549 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1550 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1551 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1552 emit(fs_inst(BRW_OPCODE_ADD, 1553 this->pixel_x, 1554 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1555 fs_reg(brw_imm_v(0x10101010)))); 1556 emit(fs_inst(BRW_OPCODE_ADD, 1557 this->pixel_y, 1558 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1559 fs_reg(brw_imm_v(0x11001100)))); 1560 1561 this->current_annotation = "compute 1/pos.w"; 1562 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1563 this->pixel_w = fs_reg(this, glsl_type::float_type); 1564 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1565 1566 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1567 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1568 1569 this->current_annotation = NULL; 1570} 1571 1572void 1573fs_visitor::emit_fb_writes() 1574{ 1575 this->current_annotation = "FB write header"; 1576 GLboolean header_present = GL_TRUE; 1577 int nr = 0; 1578 1579 if (intel->gen >= 6 && 1580 !this->kill_emitted && 1581 c->key.nr_color_regions == 1) { 1582 header_present = false; 1583 } 1584 1585 if (header_present) { 1586 /* m0, m1 header */ 1587 nr += 2; 1588 } 1589 1590 if (c->key.aa_dest_stencil_reg) { 1591 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1592 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1593 } 1594 1595 /* Reserve space for color. It'll be filled in per MRT below. */ 1596 int color_mrf = nr; 1597 nr += 4; 1598 1599 if (c->key.source_depth_to_render_target) { 1600 if (c->key.computes_depth) { 1601 /* Hand over gl_FragDepth. */ 1602 assert(this->frag_depth); 1603 fs_reg depth = *(variable_storage(this->frag_depth)); 1604 1605 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1606 } else { 1607 /* Pass through the payload depth. */ 1608 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1609 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1610 } 1611 } 1612 1613 if (c->key.dest_depth_reg) { 1614 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1615 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1616 } 1617 1618 fs_reg color = reg_undef; 1619 if (this->frag_color) 1620 color = *(variable_storage(this->frag_color)); 1621 else if (this->frag_data) 1622 color = *(variable_storage(this->frag_data)); 1623 1624 for (int target = 0; target < c->key.nr_color_regions; target++) { 1625 this->current_annotation = talloc_asprintf(this->mem_ctx, 1626 "FB write target %d", 1627 target); 1628 if (this->frag_color || this->frag_data) { 1629 for (int i = 0; i < 4; i++) { 1630 emit(fs_inst(BRW_OPCODE_MOV, 1631 fs_reg(MRF, color_mrf + i), 1632 color)); 1633 color.reg_offset++; 1634 } 1635 } 1636 1637 if (this->frag_color) 1638 color.reg_offset -= 4; 1639 1640 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1641 reg_undef, reg_undef)); 1642 inst->target = target; 1643 inst->base_mrf = 0; 1644 inst->mlen = nr; 1645 if (target == c->key.nr_color_regions - 1) 1646 inst->eot = true; 1647 inst->header_present = header_present; 1648 } 1649 1650 if (c->key.nr_color_regions == 0) { 1651 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1652 reg_undef, reg_undef)); 1653 inst->base_mrf = 0; 1654 inst->mlen = nr; 1655 inst->eot = true; 1656 inst->header_present = header_present; 1657 } 1658 1659 this->current_annotation = NULL; 1660} 1661 1662void 1663fs_visitor::generate_fb_write(fs_inst *inst) 1664{ 1665 GLboolean eot = inst->eot; 1666 struct brw_reg implied_header; 1667 1668 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1669 * move, here's g1. 1670 */ 1671 brw_push_insn_state(p); 1672 brw_set_mask_control(p, BRW_MASK_DISABLE); 1673 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1674 1675 if (inst->header_present) { 1676 if (intel->gen >= 6) { 1677 brw_MOV(p, 1678 brw_message_reg(inst->base_mrf), 1679 brw_vec8_grf(0, 0)); 1680 implied_header = brw_null_reg(); 1681 } else { 1682 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 1683 } 1684 1685 brw_MOV(p, 1686 brw_message_reg(inst->base_mrf + 1), 1687 brw_vec8_grf(1, 0)); 1688 } else { 1689 implied_header = brw_null_reg(); 1690 } 1691 1692 brw_pop_insn_state(p); 1693 1694 brw_fb_WRITE(p, 1695 8, /* dispatch_width */ 1696 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1697 inst->base_mrf, 1698 implied_header, 1699 inst->target, 1700 inst->mlen, 1701 0, 1702 eot); 1703} 1704 1705void 1706fs_visitor::generate_linterp(fs_inst *inst, 1707 struct brw_reg dst, struct brw_reg *src) 1708{ 1709 struct brw_reg delta_x = src[0]; 1710 struct brw_reg delta_y = src[1]; 1711 struct brw_reg interp = src[2]; 1712 1713 if (brw->has_pln && 1714 delta_y.nr == delta_x.nr + 1 && 1715 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1716 brw_PLN(p, dst, interp, delta_x); 1717 } else { 1718 brw_LINE(p, brw_null_reg(), interp, delta_x); 1719 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1720 } 1721} 1722 1723void 1724fs_visitor::generate_math(fs_inst *inst, 1725 struct brw_reg dst, struct brw_reg *src) 1726{ 1727 int op; 1728 1729 switch (inst->opcode) { 1730 case FS_OPCODE_RCP: 1731 op = BRW_MATH_FUNCTION_INV; 1732 break; 1733 case FS_OPCODE_RSQ: 1734 op = BRW_MATH_FUNCTION_RSQ; 1735 break; 1736 case FS_OPCODE_SQRT: 1737 op = BRW_MATH_FUNCTION_SQRT; 1738 break; 1739 case FS_OPCODE_EXP2: 1740 op = BRW_MATH_FUNCTION_EXP; 1741 break; 1742 case FS_OPCODE_LOG2: 1743 op = BRW_MATH_FUNCTION_LOG; 1744 break; 1745 case FS_OPCODE_POW: 1746 op = BRW_MATH_FUNCTION_POW; 1747 break; 1748 case FS_OPCODE_SIN: 1749 op = BRW_MATH_FUNCTION_SIN; 1750 break; 1751 case FS_OPCODE_COS: 1752 op = BRW_MATH_FUNCTION_COS; 1753 break; 1754 default: 1755 assert(!"not reached: unknown math function"); 1756 op = 0; 1757 break; 1758 } 1759 1760 assert(inst->mlen >= 1); 1761 1762 if (inst->opcode == FS_OPCODE_POW) { 1763 brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src[1]); 1764 } 1765 1766 brw_math(p, dst, 1767 op, 1768 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1769 BRW_MATH_SATURATE_NONE, 1770 inst->base_mrf, src[0], 1771 BRW_MATH_DATA_VECTOR, 1772 BRW_MATH_PRECISION_FULL); 1773} 1774 1775void 1776fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 1777{ 1778 int msg_type = -1; 1779 int rlen = 4; 1780 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 1781 1782 if (intel->gen >= 5) { 1783 switch (inst->opcode) { 1784 case FS_OPCODE_TEX: 1785 if (inst->shadow_compare) { 1786 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1787 } else { 1788 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1789 } 1790 break; 1791 case FS_OPCODE_TXB: 1792 if (inst->shadow_compare) { 1793 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1794 } else { 1795 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1796 } 1797 break; 1798 } 1799 } else { 1800 switch (inst->opcode) { 1801 case FS_OPCODE_TEX: 1802 /* Note that G45 and older determines shadow compare and dispatch width 1803 * from message length for most messages. 1804 */ 1805 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 1806 if (inst->shadow_compare) { 1807 assert(inst->mlen == 5); 1808 } else { 1809 assert(inst->mlen <= 6); 1810 } 1811 break; 1812 case FS_OPCODE_TXB: 1813 if (inst->shadow_compare) { 1814 assert(inst->mlen == 5); 1815 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 1816 } else { 1817 assert(inst->mlen == 8); 1818 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1819 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 1820 } 1821 break; 1822 } 1823 } 1824 assert(msg_type != -1); 1825 1826 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 1827 rlen = 8; 1828 dst = vec16(dst); 1829 } 1830 1831 brw_SAMPLE(p, 1832 retype(dst, BRW_REGISTER_TYPE_UW), 1833 inst->base_mrf, 1834 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1835 SURF_INDEX_TEXTURE(inst->sampler), 1836 inst->sampler, 1837 WRITEMASK_XYZW, 1838 msg_type, 1839 rlen, 1840 inst->mlen, 1841 0, 1842 1, 1843 simd_mode); 1844} 1845 1846 1847/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1848 * looking like: 1849 * 1850 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1851 * 1852 * and we're trying to produce: 1853 * 1854 * DDX DDY 1855 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1856 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1857 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1858 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1859 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1860 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1861 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1862 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1863 * 1864 * and add another set of two more subspans if in 16-pixel dispatch mode. 1865 * 1866 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1867 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1868 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1869 * between each other. We could probably do it like ddx and swizzle the right 1870 * order later, but bail for now and just produce 1871 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1872 */ 1873void 1874fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1875{ 1876 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1877 BRW_REGISTER_TYPE_F, 1878 BRW_VERTICAL_STRIDE_2, 1879 BRW_WIDTH_2, 1880 BRW_HORIZONTAL_STRIDE_0, 1881 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1882 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1883 BRW_REGISTER_TYPE_F, 1884 BRW_VERTICAL_STRIDE_2, 1885 BRW_WIDTH_2, 1886 BRW_HORIZONTAL_STRIDE_0, 1887 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1888 brw_ADD(p, dst, src0, negate(src1)); 1889} 1890 1891void 1892fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1893{ 1894 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1895 BRW_REGISTER_TYPE_F, 1896 BRW_VERTICAL_STRIDE_4, 1897 BRW_WIDTH_4, 1898 BRW_HORIZONTAL_STRIDE_0, 1899 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1900 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1901 BRW_REGISTER_TYPE_F, 1902 BRW_VERTICAL_STRIDE_4, 1903 BRW_WIDTH_4, 1904 BRW_HORIZONTAL_STRIDE_0, 1905 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1906 brw_ADD(p, dst, src0, negate(src1)); 1907} 1908 1909void 1910fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 1911{ 1912 brw_push_insn_state(p); 1913 brw_set_mask_control(p, BRW_MASK_DISABLE); 1914 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 1915 brw_pop_insn_state(p); 1916} 1917 1918void 1919fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 1920{ 1921 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1922 mask = brw_uw1_reg(mask.file, mask.nr, 0); 1923 1924 brw_push_insn_state(p); 1925 brw_set_mask_control(p, BRW_MASK_DISABLE); 1926 brw_AND(p, g0, mask, g0); 1927 brw_pop_insn_state(p); 1928} 1929 1930void 1931fs_visitor::assign_curb_setup() 1932{ 1933 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1934 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1935 1936 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1937 foreach_iter(exec_list_iterator, iter, this->instructions) { 1938 fs_inst *inst = (fs_inst *)iter.get(); 1939 1940 for (unsigned int i = 0; i < 3; i++) { 1941 if (inst->src[i].file == UNIFORM) { 1942 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1943 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1944 constant_nr / 8, 1945 constant_nr % 8); 1946 1947 inst->src[i].file = FIXED_HW_REG; 1948 inst->src[i].fixed_hw_reg = brw_reg; 1949 } 1950 } 1951 } 1952} 1953 1954void 1955fs_visitor::calculate_urb_setup() 1956{ 1957 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1958 urb_setup[i] = -1; 1959 } 1960 1961 int urb_next = 0; 1962 /* Figure out where each of the incoming setup attributes lands. */ 1963 if (intel->gen >= 6) { 1964 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1965 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 1966 urb_setup[i] = urb_next++; 1967 } 1968 } 1969 } else { 1970 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 1971 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 1972 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 1973 int fp_index; 1974 1975 if (i >= VERT_RESULT_VAR0) 1976 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 1977 else if (i <= VERT_RESULT_TEX7) 1978 fp_index = i; 1979 else 1980 fp_index = -1; 1981 1982 if (fp_index >= 0) 1983 urb_setup[fp_index] = urb_next++; 1984 } 1985 } 1986 } 1987 1988 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1989 c->prog_data.urb_read_length = urb_next * 2; 1990} 1991 1992void 1993fs_visitor::assign_urb_setup() 1994{ 1995 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1996 1997 /* Offset all the urb_setup[] index by the actual position of the 1998 * setup regs, now that the location of the constants has been chosen. 1999 */ 2000 foreach_iter(exec_list_iterator, iter, this->instructions) { 2001 fs_inst *inst = (fs_inst *)iter.get(); 2002 2003 if (inst->opcode != FS_OPCODE_LINTERP) 2004 continue; 2005 2006 assert(inst->src[2].file == FIXED_HW_REG); 2007 2008 inst->src[2].fixed_hw_reg.nr += urb_start; 2009 } 2010 2011 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2012} 2013 2014static void 2015assign_reg(int *reg_hw_locations, fs_reg *reg) 2016{ 2017 if (reg->file == GRF && reg->reg != 0) { 2018 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2019 reg->reg = 0; 2020 } 2021} 2022 2023void 2024fs_visitor::assign_regs_trivial() 2025{ 2026 int last_grf = 0; 2027 int hw_reg_mapping[this->virtual_grf_next]; 2028 int i; 2029 2030 hw_reg_mapping[0] = 0; 2031 hw_reg_mapping[1] = this->first_non_payload_grf; 2032 for (i = 2; i < this->virtual_grf_next; i++) { 2033 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2034 this->virtual_grf_sizes[i - 1]); 2035 } 2036 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2037 2038 foreach_iter(exec_list_iterator, iter, this->instructions) { 2039 fs_inst *inst = (fs_inst *)iter.get(); 2040 2041 assign_reg(hw_reg_mapping, &inst->dst); 2042 assign_reg(hw_reg_mapping, &inst->src[0]); 2043 assign_reg(hw_reg_mapping, &inst->src[1]); 2044 } 2045 2046 this->grf_used = last_grf + 1; 2047} 2048 2049void 2050fs_visitor::assign_regs() 2051{ 2052 int last_grf = 0; 2053 int hw_reg_mapping[this->virtual_grf_next + 1]; 2054 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2055 int class_sizes[base_reg_count]; 2056 int class_count = 0; 2057 int aligned_pair_class = -1; 2058 2059 /* Set up the register classes. 2060 * 2061 * The base registers store a scalar value. For texture samples, 2062 * we get virtual GRFs composed of 4 contiguous hw register. For 2063 * structures and arrays, we store them as contiguous larger things 2064 * than that, though we should be able to do better most of the 2065 * time. 2066 */ 2067 class_sizes[class_count++] = 1; 2068 if (brw->has_pln && intel->gen < 6) { 2069 /* Always set up the (unaligned) pairs for gen5, so we can find 2070 * them for making the aligned pair class. 2071 */ 2072 class_sizes[class_count++] = 2; 2073 } 2074 for (int r = 1; r < this->virtual_grf_next; r++) { 2075 int i; 2076 2077 for (i = 0; i < class_count; i++) { 2078 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2079 break; 2080 } 2081 if (i == class_count) { 2082 if (this->virtual_grf_sizes[r] >= base_reg_count) { 2083 fprintf(stderr, "Object too large to register allocate.\n"); 2084 this->fail = true; 2085 } 2086 2087 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2088 } 2089 } 2090 2091 int ra_reg_count = 0; 2092 int class_base_reg[class_count]; 2093 int class_reg_count[class_count]; 2094 int classes[class_count + 1]; 2095 2096 for (int i = 0; i < class_count; i++) { 2097 class_base_reg[i] = ra_reg_count; 2098 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2099 ra_reg_count += class_reg_count[i]; 2100 } 2101 2102 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2103 for (int i = 0; i < class_count; i++) { 2104 classes[i] = ra_alloc_reg_class(regs); 2105 2106 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2107 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2108 } 2109 2110 /* Add conflicts between our contiguous registers aliasing 2111 * base regs and other register classes' contiguous registers 2112 * that alias base regs, or the base regs themselves for classes[0]. 2113 */ 2114 for (int c = 0; c <= i; c++) { 2115 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2116 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2117 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); 2118 c_r++) { 2119 2120 if (0) { 2121 printf("%d/%d conflicts %d/%d\n", 2122 class_sizes[i], this->first_non_payload_grf + i_r, 2123 class_sizes[c], this->first_non_payload_grf + c_r); 2124 } 2125 2126 ra_add_reg_conflict(regs, 2127 class_base_reg[i] + i_r, 2128 class_base_reg[c] + c_r); 2129 } 2130 } 2131 } 2132 } 2133 2134 /* Add a special class for aligned pairs, which we'll put delta_x/y 2135 * in on gen5 so that we can do PLN. 2136 */ 2137 if (brw->has_pln && intel->gen < 6) { 2138 int reg_count = (base_reg_count - 1) / 2; 2139 int unaligned_pair_class = 1; 2140 assert(class_sizes[unaligned_pair_class] == 2); 2141 2142 aligned_pair_class = class_count; 2143 classes[aligned_pair_class] = ra_alloc_reg_class(regs); 2144 class_base_reg[aligned_pair_class] = 0; 2145 class_reg_count[aligned_pair_class] = 0; 2146 int start = (this->first_non_payload_grf & 1) ? 1 : 0; 2147 2148 for (int i = 0; i < reg_count; i++) { 2149 ra_class_add_reg(regs, classes[aligned_pair_class], 2150 class_base_reg[unaligned_pair_class] + i * 2 + start); 2151 } 2152 class_count++; 2153 } 2154 2155 ra_set_finalize(regs); 2156 2157 struct ra_graph *g = ra_alloc_interference_graph(regs, 2158 this->virtual_grf_next); 2159 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2160 * with nodes. 2161 */ 2162 ra_set_node_class(g, 0, classes[0]); 2163 2164 for (int i = 1; i < this->virtual_grf_next; i++) { 2165 for (int c = 0; c < class_count; c++) { 2166 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2167 if (aligned_pair_class >= 0 && 2168 this->delta_x.reg == i) { 2169 ra_set_node_class(g, i, classes[aligned_pair_class]); 2170 } else { 2171 ra_set_node_class(g, i, classes[c]); 2172 } 2173 break; 2174 } 2175 } 2176 2177 for (int j = 1; j < i; j++) { 2178 if (virtual_grf_interferes(i, j)) { 2179 ra_add_node_interference(g, i, j); 2180 } 2181 } 2182 } 2183 2184 /* FINISHME: Handle spilling */ 2185 if (!ra_allocate_no_spills(g)) { 2186 fprintf(stderr, "Failed to allocate registers.\n"); 2187 this->fail = true; 2188 return; 2189 } 2190 2191 /* Get the chosen virtual registers for each node, and map virtual 2192 * regs in the register classes back down to real hardware reg 2193 * numbers. 2194 */ 2195 hw_reg_mapping[0] = 0; /* unused */ 2196 for (int i = 1; i < this->virtual_grf_next; i++) { 2197 int reg = ra_get_node_reg(g, i); 2198 int hw_reg = -1; 2199 2200 for (int c = 0; c < class_count; c++) { 2201 if (reg >= class_base_reg[c] && 2202 reg < class_base_reg[c] + class_reg_count[c]) { 2203 hw_reg = reg - class_base_reg[c]; 2204 break; 2205 } 2206 } 2207 2208 assert(hw_reg != -1); 2209 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2210 last_grf = MAX2(last_grf, 2211 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2212 } 2213 2214 foreach_iter(exec_list_iterator, iter, this->instructions) { 2215 fs_inst *inst = (fs_inst *)iter.get(); 2216 2217 assign_reg(hw_reg_mapping, &inst->dst); 2218 assign_reg(hw_reg_mapping, &inst->src[0]); 2219 assign_reg(hw_reg_mapping, &inst->src[1]); 2220 } 2221 2222 this->grf_used = last_grf + 1; 2223 2224 talloc_free(g); 2225 talloc_free(regs); 2226} 2227 2228void 2229fs_visitor::calculate_live_intervals() 2230{ 2231 int num_vars = this->virtual_grf_next; 2232 int *def = talloc_array(mem_ctx, int, num_vars); 2233 int *use = talloc_array(mem_ctx, int, num_vars); 2234 int loop_depth = 0; 2235 int loop_start = 0; 2236 2237 for (int i = 0; i < num_vars; i++) { 2238 def[i] = 1 << 30; 2239 use[i] = -1; 2240 } 2241 2242 int ip = 0; 2243 foreach_iter(exec_list_iterator, iter, this->instructions) { 2244 fs_inst *inst = (fs_inst *)iter.get(); 2245 2246 if (inst->opcode == BRW_OPCODE_DO) { 2247 if (loop_depth++ == 0) 2248 loop_start = ip; 2249 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2250 loop_depth--; 2251 2252 if (loop_depth == 0) { 2253 /* FINISHME: 2254 * 2255 * Patches up any vars marked for use within the loop as 2256 * live until the end. This is conservative, as there 2257 * will often be variables defined and used inside the 2258 * loop but dead at the end of the loop body. 2259 */ 2260 for (int i = 0; i < num_vars; i++) { 2261 if (use[i] == loop_start) { 2262 use[i] = ip; 2263 } 2264 } 2265 } 2266 } else { 2267 int eip = ip; 2268 2269 if (loop_depth) 2270 eip = loop_start; 2271 2272 for (unsigned int i = 0; i < 3; i++) { 2273 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2274 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2275 } 2276 } 2277 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2278 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2279 } 2280 } 2281 2282 ip++; 2283 } 2284 2285 talloc_free(this->virtual_grf_def); 2286 talloc_free(this->virtual_grf_use); 2287 this->virtual_grf_def = def; 2288 this->virtual_grf_use = use; 2289} 2290 2291/** 2292 * Attempts to move immediate constants into the immediate 2293 * constant slot of following instructions. 2294 * 2295 * Immediate constants are a bit tricky -- they have to be in the last 2296 * operand slot, you can't do abs/negate on them, 2297 */ 2298 2299bool 2300fs_visitor::propagate_constants() 2301{ 2302 bool progress = false; 2303 2304 foreach_iter(exec_list_iterator, iter, this->instructions) { 2305 fs_inst *inst = (fs_inst *)iter.get(); 2306 2307 if (inst->opcode != BRW_OPCODE_MOV || 2308 inst->predicated || 2309 inst->dst.file != GRF || inst->src[0].file != IMM || 2310 inst->dst.type != inst->src[0].type) 2311 continue; 2312 2313 /* Don't bother with cases where we should have had the 2314 * operation on the constant folded in GLSL already. 2315 */ 2316 if (inst->saturate) 2317 continue; 2318 2319 /* Found a move of a constant to a GRF. Find anything else using the GRF 2320 * before it's written, and replace it with the constant if we can. 2321 */ 2322 exec_list_iterator scan_iter = iter; 2323 scan_iter.next(); 2324 for (; scan_iter.has_next(); scan_iter.next()) { 2325 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2326 2327 if (scan_inst->opcode == BRW_OPCODE_DO || 2328 scan_inst->opcode == BRW_OPCODE_WHILE || 2329 scan_inst->opcode == BRW_OPCODE_ELSE || 2330 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2331 break; 2332 } 2333 2334 for (int i = 2; i >= 0; i--) { 2335 if (scan_inst->src[i].file != GRF || 2336 scan_inst->src[i].reg != inst->dst.reg || 2337 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2338 continue; 2339 2340 /* Don't bother with cases where we should have had the 2341 * operation on the constant folded in GLSL already. 2342 */ 2343 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2344 continue; 2345 2346 switch (scan_inst->opcode) { 2347 case BRW_OPCODE_MOV: 2348 scan_inst->src[i] = inst->src[0]; 2349 progress = true; 2350 break; 2351 2352 case BRW_OPCODE_MUL: 2353 case BRW_OPCODE_ADD: 2354 if (i == 1) { 2355 scan_inst->src[i] = inst->src[0]; 2356 progress = true; 2357 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2358 /* Fit this constant in by commuting the operands */ 2359 scan_inst->src[0] = scan_inst->src[1]; 2360 scan_inst->src[1] = inst->src[0]; 2361 } 2362 break; 2363 case BRW_OPCODE_CMP: 2364 if (i == 1) { 2365 scan_inst->src[i] = inst->src[0]; 2366 progress = true; 2367 } 2368 } 2369 } 2370 2371 if (scan_inst->dst.file == GRF && 2372 scan_inst->dst.reg == inst->dst.reg && 2373 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2374 scan_inst->opcode == FS_OPCODE_TEX)) { 2375 break; 2376 } 2377 } 2378 } 2379 2380 return progress; 2381} 2382/** 2383 * Must be called after calculate_live_intervales() to remove unused 2384 * writes to registers -- register allocation will fail otherwise 2385 * because something deffed but not used won't be considered to 2386 * interfere with other regs. 2387 */ 2388bool 2389fs_visitor::dead_code_eliminate() 2390{ 2391 bool progress = false; 2392 int num_vars = this->virtual_grf_next; 2393 bool dead[num_vars]; 2394 2395 for (int i = 0; i < num_vars; i++) { 2396 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; 2397 2398 if (dead[i]) { 2399 /* Mark off its interval so it won't interfere with anything. */ 2400 this->virtual_grf_def[i] = -1; 2401 this->virtual_grf_use[i] = -1; 2402 } 2403 } 2404 2405 foreach_iter(exec_list_iterator, iter, this->instructions) { 2406 fs_inst *inst = (fs_inst *)iter.get(); 2407 2408 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2409 inst->remove(); 2410 progress = true; 2411 } 2412 } 2413 2414 return progress; 2415} 2416 2417bool 2418fs_visitor::register_coalesce() 2419{ 2420 bool progress = false; 2421 2422 foreach_iter(exec_list_iterator, iter, this->instructions) { 2423 fs_inst *inst = (fs_inst *)iter.get(); 2424 2425 if (inst->opcode != BRW_OPCODE_MOV || 2426 inst->predicated || 2427 inst->saturate || 2428 inst->dst.file != GRF || inst->src[0].file != GRF || 2429 inst->dst.type != inst->src[0].type) 2430 continue; 2431 2432 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2433 * them: check for no writes to either one until the exit of the 2434 * program. 2435 */ 2436 bool interfered = false; 2437 exec_list_iterator scan_iter = iter; 2438 scan_iter.next(); 2439 for (; scan_iter.has_next(); scan_iter.next()) { 2440 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2441 2442 if (scan_inst->opcode == BRW_OPCODE_DO || 2443 scan_inst->opcode == BRW_OPCODE_WHILE || 2444 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2445 interfered = true; 2446 iter = scan_iter; 2447 break; 2448 } 2449 2450 if (scan_inst->dst.file == GRF) { 2451 if (scan_inst->dst.reg == inst->dst.reg && 2452 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2453 scan_inst->opcode == FS_OPCODE_TEX)) { 2454 interfered = true; 2455 break; 2456 } 2457 if (scan_inst->dst.reg == inst->src[0].reg && 2458 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2459 scan_inst->opcode == FS_OPCODE_TEX)) { 2460 interfered = true; 2461 break; 2462 } 2463 } 2464 } 2465 if (interfered) { 2466 continue; 2467 } 2468 2469 /* Rewrite the later usage to point at the source of the move to 2470 * be removed. 2471 */ 2472 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2473 scan_iter.next()) { 2474 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2475 2476 for (int i = 0; i < 3; i++) { 2477 if (scan_inst->src[i].file == GRF && 2478 scan_inst->src[i].reg == inst->dst.reg && 2479 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2480 scan_inst->src[i].reg = inst->src[0].reg; 2481 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2482 scan_inst->src[i].abs |= inst->src[0].abs; 2483 scan_inst->src[i].negate ^= inst->src[0].negate; 2484 } 2485 } 2486 } 2487 2488 inst->remove(); 2489 progress = true; 2490 } 2491 2492 return progress; 2493} 2494 2495 2496bool 2497fs_visitor::compute_to_mrf() 2498{ 2499 bool progress = false; 2500 int next_ip = 0; 2501 2502 foreach_iter(exec_list_iterator, iter, this->instructions) { 2503 fs_inst *inst = (fs_inst *)iter.get(); 2504 2505 int ip = next_ip; 2506 next_ip++; 2507 2508 if (inst->opcode != BRW_OPCODE_MOV || 2509 inst->predicated || 2510 inst->dst.file != MRF || inst->src[0].file != GRF || 2511 inst->dst.type != inst->src[0].type || 2512 inst->src[0].abs || inst->src[0].negate) 2513 continue; 2514 2515 /* Can't compute-to-MRF this GRF if someone else was going to 2516 * read it later. 2517 */ 2518 if (this->virtual_grf_use[inst->src[0].reg] > ip) 2519 continue; 2520 2521 /* Found a move of a GRF to a MRF. Let's see if we can go 2522 * rewrite the thing that made this GRF to write into the MRF. 2523 */ 2524 bool found = false; 2525 fs_inst *scan_inst; 2526 for (scan_inst = (fs_inst *)inst->prev; 2527 scan_inst->prev != NULL; 2528 scan_inst = (fs_inst *)scan_inst->prev) { 2529 /* We don't handle flow control here. Most computation of 2530 * values that end up in MRFs are shortly before the MRF 2531 * write anyway. 2532 */ 2533 if (scan_inst->opcode == BRW_OPCODE_DO || 2534 scan_inst->opcode == BRW_OPCODE_WHILE || 2535 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2536 break; 2537 } 2538 2539 /* You can't read from an MRF, so if someone else reads our 2540 * MRF's source GRF that we wanted to rewrite, that stops us. 2541 */ 2542 bool interfered = false; 2543 for (int i = 0; i < 3; i++) { 2544 if (scan_inst->src[i].file == GRF && 2545 scan_inst->src[i].reg == inst->src[0].reg && 2546 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 2547 interfered = true; 2548 } 2549 } 2550 if (interfered) 2551 break; 2552 2553 if (scan_inst->dst.file == MRF && 2554 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 2555 /* Somebody else wrote our MRF here, so we can't can't 2556 * compute-to-MRF before that. 2557 */ 2558 break; 2559 } 2560 2561 if (scan_inst->mlen > 0) { 2562 /* Found a SEND instruction, which will do some amount of 2563 * implied write that may overwrite our MRF that we were 2564 * hoping to compute-to-MRF somewhere above it. Nothing 2565 * we have implied-writes more than 2 MRFs from base_mrf, 2566 * though. 2567 */ 2568 int implied_write_len = MIN2(scan_inst->mlen, 2); 2569 if (inst->dst.hw_reg >= scan_inst->base_mrf && 2570 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { 2571 break; 2572 } 2573 } 2574 2575 if (scan_inst->dst.file == GRF && 2576 scan_inst->dst.reg == inst->src[0].reg) { 2577 /* Found the last thing to write our reg we want to turn 2578 * into a compute-to-MRF. 2579 */ 2580 2581 if (scan_inst->opcode == FS_OPCODE_TEX) { 2582 /* texturing writes several continuous regs, so we can't 2583 * compute-to-mrf that. 2584 */ 2585 break; 2586 } 2587 2588 /* If it's predicated, it (probably) didn't populate all 2589 * the channels. 2590 */ 2591 if (scan_inst->predicated) 2592 break; 2593 2594 /* SEND instructions can't have MRF as a destination. */ 2595 if (scan_inst->mlen) 2596 break; 2597 2598 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 2599 /* Found the creator of our MRF's source value. */ 2600 found = true; 2601 break; 2602 } 2603 } 2604 } 2605 if (found) { 2606 scan_inst->dst.file = MRF; 2607 scan_inst->dst.hw_reg = inst->dst.hw_reg; 2608 scan_inst->saturate |= inst->saturate; 2609 inst->remove(); 2610 progress = true; 2611 } 2612 } 2613 2614 return progress; 2615} 2616 2617bool 2618fs_visitor::virtual_grf_interferes(int a, int b) 2619{ 2620 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2621 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2622 2623 /* For dead code, just check if the def interferes with the other range. */ 2624 if (this->virtual_grf_use[a] == -1) { 2625 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 2626 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 2627 } 2628 if (this->virtual_grf_use[b] == -1) { 2629 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 2630 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 2631 } 2632 2633 return start < end; 2634} 2635 2636static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2637{ 2638 struct brw_reg brw_reg; 2639 2640 switch (reg->file) { 2641 case GRF: 2642 case ARF: 2643 case MRF: 2644 brw_reg = brw_vec8_reg(reg->file, 2645 reg->hw_reg, 0); 2646 brw_reg = retype(brw_reg, reg->type); 2647 break; 2648 case IMM: 2649 switch (reg->type) { 2650 case BRW_REGISTER_TYPE_F: 2651 brw_reg = brw_imm_f(reg->imm.f); 2652 break; 2653 case BRW_REGISTER_TYPE_D: 2654 brw_reg = brw_imm_d(reg->imm.i); 2655 break; 2656 case BRW_REGISTER_TYPE_UD: 2657 brw_reg = brw_imm_ud(reg->imm.u); 2658 break; 2659 default: 2660 assert(!"not reached"); 2661 break; 2662 } 2663 break; 2664 case FIXED_HW_REG: 2665 brw_reg = reg->fixed_hw_reg; 2666 break; 2667 case BAD_FILE: 2668 /* Probably unused. */ 2669 brw_reg = brw_null_reg(); 2670 break; 2671 case UNIFORM: 2672 assert(!"not reached"); 2673 brw_reg = brw_null_reg(); 2674 break; 2675 } 2676 if (reg->abs) 2677 brw_reg = brw_abs(brw_reg); 2678 if (reg->negate) 2679 brw_reg = negate(brw_reg); 2680 2681 return brw_reg; 2682} 2683 2684void 2685fs_visitor::generate_code() 2686{ 2687 unsigned int annotation_len = 0; 2688 int last_native_inst = 0; 2689 struct brw_instruction *if_stack[16], *loop_stack[16]; 2690 int if_stack_depth = 0, loop_stack_depth = 0; 2691 int if_depth_in_loop[16]; 2692 2693 if_depth_in_loop[loop_stack_depth] = 0; 2694 2695 memset(&if_stack, 0, sizeof(if_stack)); 2696 foreach_iter(exec_list_iterator, iter, this->instructions) { 2697 fs_inst *inst = (fs_inst *)iter.get(); 2698 struct brw_reg src[3], dst; 2699 2700 for (unsigned int i = 0; i < 3; i++) { 2701 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2702 } 2703 dst = brw_reg_from_fs_reg(&inst->dst); 2704 2705 brw_set_conditionalmod(p, inst->conditional_mod); 2706 brw_set_predicate_control(p, inst->predicated); 2707 2708 switch (inst->opcode) { 2709 case BRW_OPCODE_MOV: 2710 brw_MOV(p, dst, src[0]); 2711 break; 2712 case BRW_OPCODE_ADD: 2713 brw_ADD(p, dst, src[0], src[1]); 2714 break; 2715 case BRW_OPCODE_MUL: 2716 brw_MUL(p, dst, src[0], src[1]); 2717 break; 2718 2719 case BRW_OPCODE_FRC: 2720 brw_FRC(p, dst, src[0]); 2721 break; 2722 case BRW_OPCODE_RNDD: 2723 brw_RNDD(p, dst, src[0]); 2724 break; 2725 case BRW_OPCODE_RNDZ: 2726 brw_RNDZ(p, dst, src[0]); 2727 break; 2728 2729 case BRW_OPCODE_AND: 2730 brw_AND(p, dst, src[0], src[1]); 2731 break; 2732 case BRW_OPCODE_OR: 2733 brw_OR(p, dst, src[0], src[1]); 2734 break; 2735 case BRW_OPCODE_XOR: 2736 brw_XOR(p, dst, src[0], src[1]); 2737 break; 2738 case BRW_OPCODE_NOT: 2739 brw_NOT(p, dst, src[0]); 2740 break; 2741 case BRW_OPCODE_ASR: 2742 brw_ASR(p, dst, src[0], src[1]); 2743 break; 2744 case BRW_OPCODE_SHR: 2745 brw_SHR(p, dst, src[0], src[1]); 2746 break; 2747 case BRW_OPCODE_SHL: 2748 brw_SHL(p, dst, src[0], src[1]); 2749 break; 2750 2751 case BRW_OPCODE_CMP: 2752 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2753 break; 2754 case BRW_OPCODE_SEL: 2755 brw_SEL(p, dst, src[0], src[1]); 2756 break; 2757 2758 case BRW_OPCODE_IF: 2759 assert(if_stack_depth < 16); 2760 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2761 if_depth_in_loop[loop_stack_depth]++; 2762 if_stack_depth++; 2763 break; 2764 case BRW_OPCODE_ELSE: 2765 if_stack[if_stack_depth - 1] = 2766 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2767 break; 2768 case BRW_OPCODE_ENDIF: 2769 if_stack_depth--; 2770 brw_ENDIF(p , if_stack[if_stack_depth]); 2771 if_depth_in_loop[loop_stack_depth]--; 2772 break; 2773 2774 case BRW_OPCODE_DO: 2775 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2776 if_depth_in_loop[loop_stack_depth] = 0; 2777 break; 2778 2779 case BRW_OPCODE_BREAK: 2780 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2781 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2782 break; 2783 case BRW_OPCODE_CONTINUE: 2784 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2785 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2786 break; 2787 2788 case BRW_OPCODE_WHILE: { 2789 struct brw_instruction *inst0, *inst1; 2790 GLuint br = 1; 2791 2792 if (intel->gen >= 5) 2793 br = 2; 2794 2795 assert(loop_stack_depth > 0); 2796 loop_stack_depth--; 2797 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2798 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2799 while (inst0 > loop_stack[loop_stack_depth]) { 2800 inst0--; 2801 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2802 inst0->bits3.if_else.jump_count == 0) { 2803 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2804 } 2805 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2806 inst0->bits3.if_else.jump_count == 0) { 2807 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2808 } 2809 } 2810 } 2811 break; 2812 2813 case FS_OPCODE_RCP: 2814 case FS_OPCODE_RSQ: 2815 case FS_OPCODE_SQRT: 2816 case FS_OPCODE_EXP2: 2817 case FS_OPCODE_LOG2: 2818 case FS_OPCODE_POW: 2819 case FS_OPCODE_SIN: 2820 case FS_OPCODE_COS: 2821 generate_math(inst, dst, src); 2822 break; 2823 case FS_OPCODE_LINTERP: 2824 generate_linterp(inst, dst, src); 2825 break; 2826 case FS_OPCODE_TEX: 2827 case FS_OPCODE_TXB: 2828 case FS_OPCODE_TXL: 2829 generate_tex(inst, dst); 2830 break; 2831 case FS_OPCODE_DISCARD_NOT: 2832 generate_discard_not(inst, dst); 2833 break; 2834 case FS_OPCODE_DISCARD_AND: 2835 generate_discard_and(inst, src[0]); 2836 break; 2837 case FS_OPCODE_DDX: 2838 generate_ddx(inst, dst, src[0]); 2839 break; 2840 case FS_OPCODE_DDY: 2841 generate_ddy(inst, dst, src[0]); 2842 break; 2843 case FS_OPCODE_FB_WRITE: 2844 generate_fb_write(inst); 2845 break; 2846 default: 2847 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2848 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2849 brw_opcodes[inst->opcode].name); 2850 } else { 2851 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2852 } 2853 this->fail = true; 2854 } 2855 2856 if (annotation_len < p->nr_insn) { 2857 annotation_len *= 2; 2858 if (annotation_len < 16) 2859 annotation_len = 16; 2860 2861 this->annotation_string = talloc_realloc(this->mem_ctx, 2862 annotation_string, 2863 const char *, 2864 annotation_len); 2865 this->annotation_ir = talloc_realloc(this->mem_ctx, 2866 annotation_ir, 2867 ir_instruction *, 2868 annotation_len); 2869 } 2870 2871 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2872 this->annotation_string[i] = inst->annotation; 2873 this->annotation_ir[i] = inst->ir; 2874 } 2875 last_native_inst = p->nr_insn; 2876 } 2877} 2878 2879GLboolean 2880brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2881{ 2882 struct brw_compile *p = &c->func; 2883 struct intel_context *intel = &brw->intel; 2884 GLcontext *ctx = &intel->ctx; 2885 struct brw_shader *shader = NULL; 2886 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2887 2888 if (!prog) 2889 return GL_FALSE; 2890 2891 if (!using_new_fs) 2892 return GL_FALSE; 2893 2894 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2895 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2896 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2897 break; 2898 } 2899 } 2900 if (!shader) 2901 return GL_FALSE; 2902 2903 /* We always use 8-wide mode, at least for now. For one, flow 2904 * control only works in 8-wide. Also, when we're fragment shader 2905 * bound, we're almost always under register pressure as well, so 2906 * 8-wide would save us from the performance cliff of spilling 2907 * regs. 2908 */ 2909 c->dispatch_width = 8; 2910 2911 if (INTEL_DEBUG & DEBUG_WM) { 2912 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2913 _mesa_print_ir(shader->ir, NULL); 2914 printf("\n"); 2915 } 2916 2917 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2918 */ 2919 fs_visitor v(c, shader); 2920 2921 if (0) { 2922 v.emit_dummy_fs(); 2923 } else { 2924 v.calculate_urb_setup(); 2925 if (intel->gen < 6) 2926 v.emit_interpolation_setup_gen4(); 2927 else 2928 v.emit_interpolation_setup_gen6(); 2929 2930 /* Generate FS IR for main(). (the visitor only descends into 2931 * functions called "main"). 2932 */ 2933 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2934 ir_instruction *ir = (ir_instruction *)iter.get(); 2935 v.base_ir = ir; 2936 ir->accept(&v); 2937 } 2938 2939 v.emit_fb_writes(); 2940 v.assign_curb_setup(); 2941 v.assign_urb_setup(); 2942 2943 bool progress; 2944 do { 2945 progress = false; 2946 2947 v.calculate_live_intervals(); 2948 progress = v.propagate_constants() || progress; 2949 progress = v.register_coalesce() || progress; 2950 progress = v.compute_to_mrf() || progress; 2951 progress = v.dead_code_eliminate() || progress; 2952 } while (progress); 2953 2954 if (0) 2955 v.assign_regs_trivial(); 2956 else 2957 v.assign_regs(); 2958 } 2959 2960 if (!v.fail) 2961 v.generate_code(); 2962 2963 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2964 2965 if (v.fail) 2966 return GL_FALSE; 2967 2968 if (INTEL_DEBUG & DEBUG_WM) { 2969 const char *last_annotation_string = NULL; 2970 ir_instruction *last_annotation_ir = NULL; 2971 2972 printf("Native code for fragment shader %d:\n", prog->Name); 2973 for (unsigned int i = 0; i < p->nr_insn; i++) { 2974 if (last_annotation_ir != v.annotation_ir[i]) { 2975 last_annotation_ir = v.annotation_ir[i]; 2976 if (last_annotation_ir) { 2977 printf(" "); 2978 last_annotation_ir->print(); 2979 printf("\n"); 2980 } 2981 } 2982 if (last_annotation_string != v.annotation_string[i]) { 2983 last_annotation_string = v.annotation_string[i]; 2984 if (last_annotation_string) 2985 printf(" %s\n", last_annotation_string); 2986 } 2987 brw_disasm(stdout, &p->store[i], intel->gen); 2988 } 2989 printf("\n"); 2990 } 2991 2992 c->prog_data.total_grf = v.grf_used; 2993 c->prog_data.total_scratch = 0; 2994 2995 return GL_TRUE; 2996} 2997