brw_fs.cpp revision f157812bbbcf9caac1f84988e738fc9d1e051056
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "brw_fs.h" 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir_optimization.h" 49#include "../glsl/ir_print_visitor.h" 50 51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 52 53struct gl_shader * 54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 55{ 56 struct brw_shader *shader; 57 58 shader = talloc_zero(NULL, struct brw_shader); 59 if (shader) { 60 shader->base.Type = type; 61 shader->base.Name = name; 62 _mesa_init_shader(ctx, &shader->base); 63 } 64 65 return &shader->base; 66} 67 68struct gl_shader_program * 69brw_new_shader_program(struct gl_context *ctx, GLuint name) 70{ 71 struct brw_shader_program *prog; 72 prog = talloc_zero(NULL, struct brw_shader_program); 73 if (prog) { 74 prog->base.Name = name; 75 _mesa_init_shader_program(ctx, &prog->base); 76 } 77 return &prog->base; 78} 79 80GLboolean 81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 82{ 83 if (!_mesa_ir_compile_shader(ctx, shader)) 84 return GL_FALSE; 85 86 return GL_TRUE; 87} 88 89GLboolean 90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 91{ 92 struct intel_context *intel = intel_context(ctx); 93 94 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 95 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 96 97 if (shader->base.Type == GL_FRAGMENT_SHADER) { 98 void *mem_ctx = talloc_new(NULL); 99 bool progress; 100 101 if (shader->ir) 102 talloc_free(shader->ir); 103 shader->ir = new(shader) exec_list; 104 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 105 106 do_mat_op_to_vec(shader->ir); 107 do_mod_to_fract(shader->ir); 108 do_div_to_mul_rcp(shader->ir); 109 do_sub_to_add_neg(shader->ir); 110 do_explog_to_explog2(shader->ir); 111 do_lower_texture_projection(shader->ir); 112 brw_do_cubemap_normalize(shader->ir); 113 114 do { 115 progress = false; 116 117 brw_do_channel_expressions(shader->ir); 118 brw_do_vector_splitting(shader->ir); 119 120 progress = do_lower_jumps(shader->ir, true, true, 121 true, /* main return */ 122 false, /* continue */ 123 false /* loops */ 124 ) || progress; 125 126 progress = do_common_optimization(shader->ir, true, 32) || progress; 127 128 progress = lower_noise(shader->ir) || progress; 129 progress = 130 lower_variable_index_to_cond_assign(shader->ir, 131 GL_TRUE, /* input */ 132 GL_TRUE, /* output */ 133 GL_TRUE, /* temp */ 134 GL_TRUE /* uniform */ 135 ) || progress; 136 if (intel->gen == 6) { 137 progress = do_if_to_cond_assign(shader->ir) || progress; 138 } 139 } while (progress); 140 141 validate_ir_tree(shader->ir); 142 143 reparent_ir(shader->ir, shader->ir); 144 talloc_free(mem_ctx); 145 } 146 } 147 148 if (!_mesa_ir_link_shader(ctx, prog)) 149 return GL_FALSE; 150 151 return GL_TRUE; 152} 153 154static int 155type_size(const struct glsl_type *type) 156{ 157 unsigned int size, i; 158 159 switch (type->base_type) { 160 case GLSL_TYPE_UINT: 161 case GLSL_TYPE_INT: 162 case GLSL_TYPE_FLOAT: 163 case GLSL_TYPE_BOOL: 164 return type->components(); 165 case GLSL_TYPE_ARRAY: 166 return type_size(type->fields.array) * type->length; 167 case GLSL_TYPE_STRUCT: 168 size = 0; 169 for (i = 0; i < type->length; i++) { 170 size += type_size(type->fields.structure[i].type); 171 } 172 return size; 173 case GLSL_TYPE_SAMPLER: 174 /* Samplers take up no register space, since they're baked in at 175 * link time. 176 */ 177 return 0; 178 default: 179 assert(!"not reached"); 180 return 0; 181 } 182} 183 184static const fs_reg reg_undef; 185static const fs_reg reg_null(ARF, BRW_ARF_NULL); 186 187int 188fs_visitor::virtual_grf_alloc(int size) 189{ 190 if (virtual_grf_array_size <= virtual_grf_next) { 191 if (virtual_grf_array_size == 0) 192 virtual_grf_array_size = 16; 193 else 194 virtual_grf_array_size *= 2; 195 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 196 int, virtual_grf_array_size); 197 198 /* This slot is always unused. */ 199 virtual_grf_sizes[0] = 0; 200 } 201 virtual_grf_sizes[virtual_grf_next] = size; 202 return virtual_grf_next++; 203} 204 205/** Fixed HW reg constructor. */ 206fs_reg::fs_reg(enum register_file file, int hw_reg) 207{ 208 init(); 209 this->file = file; 210 this->hw_reg = hw_reg; 211 this->type = BRW_REGISTER_TYPE_F; 212} 213 214int 215brw_type_for_base_type(const struct glsl_type *type) 216{ 217 switch (type->base_type) { 218 case GLSL_TYPE_FLOAT: 219 return BRW_REGISTER_TYPE_F; 220 case GLSL_TYPE_INT: 221 case GLSL_TYPE_BOOL: 222 return BRW_REGISTER_TYPE_D; 223 case GLSL_TYPE_UINT: 224 return BRW_REGISTER_TYPE_UD; 225 case GLSL_TYPE_ARRAY: 226 case GLSL_TYPE_STRUCT: 227 /* These should be overridden with the type of the member when 228 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 229 * way to trip up if we don't. 230 */ 231 return BRW_REGISTER_TYPE_UD; 232 default: 233 assert(!"not reached"); 234 return BRW_REGISTER_TYPE_F; 235 } 236} 237 238/** Automatic reg constructor. */ 239fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 240{ 241 init(); 242 243 this->file = GRF; 244 this->reg = v->virtual_grf_alloc(type_size(type)); 245 this->reg_offset = 0; 246 this->type = brw_type_for_base_type(type); 247} 248 249fs_reg * 250fs_visitor::variable_storage(ir_variable *var) 251{ 252 return (fs_reg *)hash_table_find(this->variable_ht, var); 253} 254 255/* Our support for uniforms is piggy-backed on the struct 256 * gl_fragment_program, because that's where the values actually 257 * get stored, rather than in some global gl_shader_program uniform 258 * store. 259 */ 260int 261fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 262{ 263 unsigned int offset = 0; 264 float *vec_values; 265 266 if (type->is_matrix()) { 267 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 268 type->vector_elements, 269 1); 270 271 for (unsigned int i = 0; i < type->matrix_columns; i++) { 272 offset += setup_uniform_values(loc + offset, column); 273 } 274 275 return offset; 276 } 277 278 switch (type->base_type) { 279 case GLSL_TYPE_FLOAT: 280 case GLSL_TYPE_UINT: 281 case GLSL_TYPE_INT: 282 case GLSL_TYPE_BOOL: 283 vec_values = fp->Base.Parameters->ParameterValues[loc]; 284 for (unsigned int i = 0; i < type->vector_elements; i++) { 285 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 286 } 287 return 1; 288 289 case GLSL_TYPE_STRUCT: 290 for (unsigned int i = 0; i < type->length; i++) { 291 offset += setup_uniform_values(loc + offset, 292 type->fields.structure[i].type); 293 } 294 return offset; 295 296 case GLSL_TYPE_ARRAY: 297 for (unsigned int i = 0; i < type->length; i++) { 298 offset += setup_uniform_values(loc + offset, type->fields.array); 299 } 300 return offset; 301 302 case GLSL_TYPE_SAMPLER: 303 /* The sampler takes up a slot, but we don't use any values from it. */ 304 return 1; 305 306 default: 307 assert(!"not reached"); 308 return 0; 309 } 310} 311 312 313/* Our support for builtin uniforms is even scarier than non-builtin. 314 * It sits on top of the PROG_STATE_VAR parameters that are 315 * automatically updated from GL context state. 316 */ 317void 318fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 319{ 320 const struct gl_builtin_uniform_desc *statevar = NULL; 321 322 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 323 statevar = &_mesa_builtin_uniform_desc[i]; 324 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 325 break; 326 } 327 328 if (!statevar->name) { 329 this->fail = true; 330 printf("Failed to find builtin uniform `%s'\n", ir->name); 331 return; 332 } 333 334 int array_count; 335 if (ir->type->is_array()) { 336 array_count = ir->type->length; 337 } else { 338 array_count = 1; 339 } 340 341 for (int a = 0; a < array_count; a++) { 342 for (unsigned int i = 0; i < statevar->num_elements; i++) { 343 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 344 int tokens[STATE_LENGTH]; 345 346 memcpy(tokens, element->tokens, sizeof(element->tokens)); 347 if (ir->type->is_array()) { 348 tokens[1] = a; 349 } 350 351 /* This state reference has already been setup by ir_to_mesa, 352 * but we'll get the same index back here. 353 */ 354 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 355 (gl_state_index *)tokens); 356 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 357 358 /* Add each of the unique swizzles of the element as a 359 * parameter. This'll end up matching the expected layout of 360 * the array/matrix/structure we're trying to fill in. 361 */ 362 int last_swiz = -1; 363 for (unsigned int i = 0; i < 4; i++) { 364 int swiz = GET_SWZ(element->swizzle, i); 365 if (swiz == last_swiz) 366 break; 367 last_swiz = swiz; 368 369 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 370 } 371 } 372 } 373} 374 375fs_reg * 376fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 377{ 378 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 379 fs_reg wpos = *reg; 380 fs_reg neg_y = this->pixel_y; 381 neg_y.negate = true; 382 383 /* gl_FragCoord.x */ 384 if (ir->pixel_center_integer) { 385 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 386 } else { 387 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 388 } 389 wpos.reg_offset++; 390 391 /* gl_FragCoord.y */ 392 if (ir->origin_upper_left && ir->pixel_center_integer) { 393 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 394 } else { 395 fs_reg pixel_y = this->pixel_y; 396 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 397 398 if (!ir->origin_upper_left) { 399 pixel_y.negate = true; 400 offset += c->key.drawable_height - 1.0; 401 } 402 403 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 404 } 405 wpos.reg_offset++; 406 407 /* gl_FragCoord.z */ 408 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 409 interp_reg(FRAG_ATTRIB_WPOS, 2))); 410 wpos.reg_offset++; 411 412 /* gl_FragCoord.w: Already set up in emit_interpolation */ 413 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 414 415 return reg; 416} 417 418fs_reg * 419fs_visitor::emit_general_interpolation(ir_variable *ir) 420{ 421 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 422 /* Interpolation is always in floating point regs. */ 423 reg->type = BRW_REGISTER_TYPE_F; 424 fs_reg attr = *reg; 425 426 unsigned int array_elements; 427 const glsl_type *type; 428 429 if (ir->type->is_array()) { 430 array_elements = ir->type->length; 431 if (array_elements == 0) { 432 this->fail = true; 433 } 434 type = ir->type->fields.array; 435 } else { 436 array_elements = 1; 437 type = ir->type; 438 } 439 440 int location = ir->location; 441 for (unsigned int i = 0; i < array_elements; i++) { 442 for (unsigned int j = 0; j < type->matrix_columns; j++) { 443 if (urb_setup[location] == -1) { 444 /* If there's no incoming setup data for this slot, don't 445 * emit interpolation for it. 446 */ 447 attr.reg_offset += type->vector_elements; 448 location++; 449 continue; 450 } 451 452 for (unsigned int c = 0; c < type->vector_elements; c++) { 453 struct brw_reg interp = interp_reg(location, c); 454 emit(fs_inst(FS_OPCODE_LINTERP, 455 attr, 456 this->delta_x, 457 this->delta_y, 458 fs_reg(interp))); 459 attr.reg_offset++; 460 } 461 462 if (intel->gen < 6) { 463 attr.reg_offset -= type->vector_elements; 464 for (unsigned int c = 0; c < type->vector_elements; c++) { 465 emit(fs_inst(BRW_OPCODE_MUL, 466 attr, 467 attr, 468 this->pixel_w)); 469 attr.reg_offset++; 470 } 471 } 472 location++; 473 } 474 } 475 476 return reg; 477} 478 479fs_reg * 480fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) 481{ 482 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 483 484 /* The frontfacing comes in as a bit in the thread payload. */ 485 if (intel->gen >= 6) { 486 emit(fs_inst(BRW_OPCODE_ASR, 487 *reg, 488 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), 489 fs_reg(15))); 490 emit(fs_inst(BRW_OPCODE_NOT, 491 *reg, 492 *reg)); 493 emit(fs_inst(BRW_OPCODE_AND, 494 *reg, 495 *reg, 496 fs_reg(1))); 497 } else { 498 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 499 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 500 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 501 * us front face 502 */ 503 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 504 *reg, 505 fs_reg(r1_6ud), 506 fs_reg(1u << 31))); 507 inst->conditional_mod = BRW_CONDITIONAL_L; 508 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 509 } 510 511 return reg; 512} 513 514fs_inst * 515fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) 516{ 517 switch (opcode) { 518 case FS_OPCODE_RCP: 519 case FS_OPCODE_RSQ: 520 case FS_OPCODE_SQRT: 521 case FS_OPCODE_EXP2: 522 case FS_OPCODE_LOG2: 523 case FS_OPCODE_SIN: 524 case FS_OPCODE_COS: 525 break; 526 default: 527 assert(!"not reached: bad math opcode"); 528 return NULL; 529 } 530 531 /* Can't do hstride == 0 args to gen6 math, so expand it out. We 532 * might be able to do better by doing execsize = 1 math and then 533 * expanding that result out, but we would need to be careful with 534 * masking. 535 */ 536 if (intel->gen >= 6 && src.file == UNIFORM) { 537 fs_reg expanded = fs_reg(this, glsl_type::float_type); 538 emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); 539 src = expanded; 540 } 541 542 fs_inst *inst = emit(fs_inst(opcode, dst, src)); 543 544 if (intel->gen < 6) { 545 inst->base_mrf = 2; 546 inst->mlen = 1; 547 } 548 549 return inst; 550} 551 552fs_inst * 553fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) 554{ 555 int base_mrf = 2; 556 fs_inst *inst; 557 558 assert(opcode == FS_OPCODE_POW); 559 560 if (intel->gen >= 6) { 561 /* Can't do hstride == 0 args to gen6 math, so expand it out. */ 562 if (src0.file == UNIFORM) { 563 fs_reg expanded = fs_reg(this, glsl_type::float_type); 564 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); 565 src0 = expanded; 566 } 567 568 if (src1.file == UNIFORM) { 569 fs_reg expanded = fs_reg(this, glsl_type::float_type); 570 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); 571 src1 = expanded; 572 } 573 574 inst = emit(fs_inst(opcode, dst, src0, src1)); 575 } else { 576 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); 577 inst = emit(fs_inst(opcode, dst, src0, reg_null)); 578 579 inst->base_mrf = base_mrf; 580 inst->mlen = 2; 581 } 582 return inst; 583} 584 585void 586fs_visitor::visit(ir_variable *ir) 587{ 588 fs_reg *reg = NULL; 589 590 if (variable_storage(ir)) 591 return; 592 593 if (strcmp(ir->name, "gl_FragColor") == 0) { 594 this->frag_color = ir; 595 } else if (strcmp(ir->name, "gl_FragData") == 0) { 596 this->frag_data = ir; 597 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 598 this->frag_depth = ir; 599 } 600 601 if (ir->mode == ir_var_in) { 602 if (!strcmp(ir->name, "gl_FragCoord")) { 603 reg = emit_fragcoord_interpolation(ir); 604 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 605 reg = emit_frontfacing_interpolation(ir); 606 } else { 607 reg = emit_general_interpolation(ir); 608 } 609 assert(reg); 610 hash_table_insert(this->variable_ht, reg, ir); 611 return; 612 } 613 614 if (ir->mode == ir_var_uniform) { 615 int param_index = c->prog_data.nr_params; 616 617 if (!strncmp(ir->name, "gl_", 3)) { 618 setup_builtin_uniform_values(ir); 619 } else { 620 setup_uniform_values(ir->location, ir->type); 621 } 622 623 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 624 } 625 626 if (!reg) 627 reg = new(this->mem_ctx) fs_reg(this, ir->type); 628 629 hash_table_insert(this->variable_ht, reg, ir); 630} 631 632void 633fs_visitor::visit(ir_dereference_variable *ir) 634{ 635 fs_reg *reg = variable_storage(ir->var); 636 this->result = *reg; 637} 638 639void 640fs_visitor::visit(ir_dereference_record *ir) 641{ 642 const glsl_type *struct_type = ir->record->type; 643 644 ir->record->accept(this); 645 646 unsigned int offset = 0; 647 for (unsigned int i = 0; i < struct_type->length; i++) { 648 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 649 break; 650 offset += type_size(struct_type->fields.structure[i].type); 651 } 652 this->result.reg_offset += offset; 653 this->result.type = brw_type_for_base_type(ir->type); 654} 655 656void 657fs_visitor::visit(ir_dereference_array *ir) 658{ 659 ir_constant *index; 660 int element_size; 661 662 ir->array->accept(this); 663 index = ir->array_index->as_constant(); 664 665 element_size = type_size(ir->type); 666 this->result.type = brw_type_for_base_type(ir->type); 667 668 if (index) { 669 assert(this->result.file == UNIFORM || 670 (this->result.file == GRF && 671 this->result.reg != 0)); 672 this->result.reg_offset += index->value.i[0] * element_size; 673 } else { 674 assert(!"FINISHME: non-constant array element"); 675 } 676} 677 678void 679fs_visitor::visit(ir_expression *ir) 680{ 681 unsigned int operand; 682 fs_reg op[2], temp; 683 fs_reg result; 684 fs_inst *inst; 685 686 for (operand = 0; operand < ir->get_num_operands(); operand++) { 687 ir->operands[operand]->accept(this); 688 if (this->result.file == BAD_FILE) { 689 ir_print_visitor v; 690 printf("Failed to get tree for expression operand:\n"); 691 ir->operands[operand]->accept(&v); 692 this->fail = true; 693 } 694 op[operand] = this->result; 695 696 /* Matrix expression operands should have been broken down to vector 697 * operations already. 698 */ 699 assert(!ir->operands[operand]->type->is_matrix()); 700 /* And then those vector operands should have been broken down to scalar. 701 */ 702 assert(!ir->operands[operand]->type->is_vector()); 703 } 704 705 /* Storage for our result. If our result goes into an assignment, it will 706 * just get copy-propagated out, so no worries. 707 */ 708 this->result = fs_reg(this, ir->type); 709 710 switch (ir->operation) { 711 case ir_unop_logic_not: 712 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 713 * ones complement of the whole register, not just bit 0. 714 */ 715 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 716 break; 717 case ir_unop_neg: 718 op[0].negate = !op[0].negate; 719 this->result = op[0]; 720 break; 721 case ir_unop_abs: 722 op[0].abs = true; 723 this->result = op[0]; 724 break; 725 case ir_unop_sign: 726 temp = fs_reg(this, ir->type); 727 728 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 729 730 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 731 inst->conditional_mod = BRW_CONDITIONAL_G; 732 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 733 inst->predicated = true; 734 735 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 736 inst->conditional_mod = BRW_CONDITIONAL_L; 737 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 738 inst->predicated = true; 739 740 break; 741 case ir_unop_rcp: 742 emit_math(FS_OPCODE_RCP, this->result, op[0]); 743 break; 744 745 case ir_unop_exp2: 746 emit_math(FS_OPCODE_EXP2, this->result, op[0]); 747 break; 748 case ir_unop_log2: 749 emit_math(FS_OPCODE_LOG2, this->result, op[0]); 750 break; 751 case ir_unop_exp: 752 case ir_unop_log: 753 assert(!"not reached: should be handled by ir_explog_to_explog2"); 754 break; 755 case ir_unop_sin: 756 emit_math(FS_OPCODE_SIN, this->result, op[0]); 757 break; 758 case ir_unop_cos: 759 emit_math(FS_OPCODE_COS, this->result, op[0]); 760 break; 761 762 case ir_unop_dFdx: 763 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 764 break; 765 case ir_unop_dFdy: 766 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 767 break; 768 769 case ir_binop_add: 770 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 771 break; 772 case ir_binop_sub: 773 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 774 break; 775 776 case ir_binop_mul: 777 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 778 break; 779 case ir_binop_div: 780 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 781 break; 782 case ir_binop_mod: 783 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 784 break; 785 786 case ir_binop_less: 787 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 788 inst->conditional_mod = BRW_CONDITIONAL_L; 789 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 790 break; 791 case ir_binop_greater: 792 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 793 inst->conditional_mod = BRW_CONDITIONAL_G; 794 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 795 break; 796 case ir_binop_lequal: 797 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 798 inst->conditional_mod = BRW_CONDITIONAL_LE; 799 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 800 break; 801 case ir_binop_gequal: 802 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 803 inst->conditional_mod = BRW_CONDITIONAL_GE; 804 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 805 break; 806 case ir_binop_equal: 807 case ir_binop_all_equal: /* same as nequal for scalars */ 808 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 809 inst->conditional_mod = BRW_CONDITIONAL_Z; 810 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 811 break; 812 case ir_binop_nequal: 813 case ir_binop_any_nequal: /* same as nequal for scalars */ 814 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 815 inst->conditional_mod = BRW_CONDITIONAL_NZ; 816 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 817 break; 818 819 case ir_binop_logic_xor: 820 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 821 break; 822 823 case ir_binop_logic_or: 824 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 825 break; 826 827 case ir_binop_logic_and: 828 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 829 break; 830 831 case ir_binop_dot: 832 case ir_binop_cross: 833 case ir_unop_any: 834 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 835 break; 836 837 case ir_unop_noise: 838 assert(!"not reached: should be handled by lower_noise"); 839 break; 840 841 case ir_unop_sqrt: 842 emit_math(FS_OPCODE_SQRT, this->result, op[0]); 843 break; 844 845 case ir_unop_rsq: 846 emit_math(FS_OPCODE_RSQ, this->result, op[0]); 847 break; 848 849 case ir_unop_i2f: 850 case ir_unop_b2f: 851 case ir_unop_b2i: 852 case ir_unop_f2i: 853 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 854 break; 855 case ir_unop_f2b: 856 case ir_unop_i2b: 857 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 858 inst->conditional_mod = BRW_CONDITIONAL_NZ; 859 inst = emit(fs_inst(BRW_OPCODE_AND, this->result, 860 this->result, fs_reg(1))); 861 break; 862 863 case ir_unop_trunc: 864 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 865 break; 866 case ir_unop_ceil: 867 op[0].negate = !op[0].negate; 868 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 869 this->result.negate = true; 870 break; 871 case ir_unop_floor: 872 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 873 break; 874 case ir_unop_fract: 875 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 876 break; 877 case ir_unop_round_even: 878 emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); 879 break; 880 881 case ir_binop_min: 882 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 883 inst->conditional_mod = BRW_CONDITIONAL_L; 884 885 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 886 inst->predicated = true; 887 break; 888 case ir_binop_max: 889 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 890 inst->conditional_mod = BRW_CONDITIONAL_G; 891 892 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 893 inst->predicated = true; 894 break; 895 896 case ir_binop_pow: 897 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); 898 break; 899 900 case ir_unop_bit_not: 901 case ir_unop_u2f: 902 case ir_binop_lshift: 903 case ir_binop_rshift: 904 case ir_binop_bit_and: 905 case ir_binop_bit_xor: 906 case ir_binop_bit_or: 907 assert(!"GLSL 1.30 features unsupported"); 908 break; 909 } 910} 911 912void 913fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 914 const glsl_type *type, bool predicated) 915{ 916 switch (type->base_type) { 917 case GLSL_TYPE_FLOAT: 918 case GLSL_TYPE_UINT: 919 case GLSL_TYPE_INT: 920 case GLSL_TYPE_BOOL: 921 for (unsigned int i = 0; i < type->components(); i++) { 922 l.type = brw_type_for_base_type(type); 923 r.type = brw_type_for_base_type(type); 924 925 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 926 inst->predicated = predicated; 927 928 l.reg_offset++; 929 r.reg_offset++; 930 } 931 break; 932 case GLSL_TYPE_ARRAY: 933 for (unsigned int i = 0; i < type->length; i++) { 934 emit_assignment_writes(l, r, type->fields.array, predicated); 935 } 936 937 case GLSL_TYPE_STRUCT: 938 for (unsigned int i = 0; i < type->length; i++) { 939 emit_assignment_writes(l, r, type->fields.structure[i].type, 940 predicated); 941 } 942 break; 943 944 case GLSL_TYPE_SAMPLER: 945 break; 946 947 default: 948 assert(!"not reached"); 949 break; 950 } 951} 952 953void 954fs_visitor::visit(ir_assignment *ir) 955{ 956 struct fs_reg l, r; 957 fs_inst *inst; 958 959 /* FINISHME: arrays on the lhs */ 960 ir->lhs->accept(this); 961 l = this->result; 962 963 ir->rhs->accept(this); 964 r = this->result; 965 966 assert(l.file != BAD_FILE); 967 assert(r.file != BAD_FILE); 968 969 if (ir->condition) { 970 emit_bool_to_cond_code(ir->condition); 971 } 972 973 if (ir->lhs->type->is_scalar() || 974 ir->lhs->type->is_vector()) { 975 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 976 if (ir->write_mask & (1 << i)) { 977 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 978 if (ir->condition) 979 inst->predicated = true; 980 r.reg_offset++; 981 } 982 l.reg_offset++; 983 } 984 } else { 985 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 986 } 987} 988 989fs_inst * 990fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 991{ 992 int mlen; 993 int base_mrf = 1; 994 bool simd16 = false; 995 fs_reg orig_dst; 996 997 /* g0 header. */ 998 mlen = 1; 999 1000 if (ir->shadow_comparitor) { 1001 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1002 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1003 coordinate)); 1004 coordinate.reg_offset++; 1005 } 1006 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1007 mlen += 3; 1008 1009 if (ir->op == ir_tex) { 1010 /* There's no plain shadow compare message, so we use shadow 1011 * compare with a bias of 0.0. 1012 */ 1013 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1014 fs_reg(0.0f))); 1015 mlen++; 1016 } else if (ir->op == ir_txb) { 1017 ir->lod_info.bias->accept(this); 1018 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1019 this->result)); 1020 mlen++; 1021 } else { 1022 assert(ir->op == ir_txl); 1023 ir->lod_info.lod->accept(this); 1024 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1025 this->result)); 1026 mlen++; 1027 } 1028 1029 ir->shadow_comparitor->accept(this); 1030 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1031 mlen++; 1032 } else if (ir->op == ir_tex) { 1033 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1034 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1035 coordinate)); 1036 coordinate.reg_offset++; 1037 } 1038 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1039 mlen += 3; 1040 } else { 1041 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1042 * instructions. We'll need to do SIMD16 here. 1043 */ 1044 assert(ir->op == ir_txb || ir->op == ir_txl); 1045 1046 for (int i = 0; i < ir->coordinate->type->vector_elements * 2;) { 1047 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), 1048 coordinate)); 1049 coordinate.reg_offset++; 1050 } 1051 1052 /* lod/bias appears after u/v/r. */ 1053 mlen += 6; 1054 1055 if (ir->op == ir_txb) { 1056 ir->lod_info.bias->accept(this); 1057 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1058 this->result)); 1059 mlen++; 1060 } else { 1061 ir->lod_info.lod->accept(this); 1062 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1063 this->result)); 1064 mlen++; 1065 } 1066 1067 /* The unused upper half. */ 1068 mlen++; 1069 1070 /* Now, since we're doing simd16, the return is 2 interleaved 1071 * vec4s where the odd-indexed ones are junk. We'll need to move 1072 * this weirdness around to the expected layout. 1073 */ 1074 simd16 = true; 1075 orig_dst = dst; 1076 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1077 2)); 1078 dst.type = BRW_REGISTER_TYPE_F; 1079 } 1080 1081 fs_inst *inst = NULL; 1082 switch (ir->op) { 1083 case ir_tex: 1084 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1085 break; 1086 case ir_txb: 1087 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1088 break; 1089 case ir_txl: 1090 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1091 break; 1092 case ir_txd: 1093 case ir_txf: 1094 assert(!"GLSL 1.30 features unsupported"); 1095 break; 1096 } 1097 inst->base_mrf = base_mrf; 1098 inst->mlen = mlen; 1099 1100 if (simd16) { 1101 for (int i = 0; i < 4; i++) { 1102 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1103 orig_dst.reg_offset++; 1104 dst.reg_offset += 2; 1105 } 1106 } 1107 1108 return inst; 1109} 1110 1111fs_inst * 1112fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1113{ 1114 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1115 * optional parameters like shadow comparitor or LOD bias. If 1116 * optional parameters aren't present, those base slots are 1117 * optional and don't need to be included in the message. 1118 * 1119 * We don't fill in the unnecessary slots regardless, which may 1120 * look surprising in the disassembly. 1121 */ 1122 int mlen = 1; /* g0 header always present. */ 1123 int base_mrf = 1; 1124 1125 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1126 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 1127 coordinate)); 1128 coordinate.reg_offset++; 1129 } 1130 mlen += ir->coordinate->type->vector_elements; 1131 1132 if (ir->shadow_comparitor) { 1133 mlen = MAX2(mlen, 5); 1134 1135 ir->shadow_comparitor->accept(this); 1136 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1137 mlen++; 1138 } 1139 1140 fs_inst *inst = NULL; 1141 switch (ir->op) { 1142 case ir_tex: 1143 inst = emit(fs_inst(FS_OPCODE_TEX, dst)); 1144 break; 1145 case ir_txb: 1146 ir->lod_info.bias->accept(this); 1147 mlen = MAX2(mlen, 5); 1148 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1149 mlen++; 1150 1151 inst = emit(fs_inst(FS_OPCODE_TXB, dst)); 1152 break; 1153 case ir_txl: 1154 ir->lod_info.lod->accept(this); 1155 mlen = MAX2(mlen, 5); 1156 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1157 mlen++; 1158 1159 inst = emit(fs_inst(FS_OPCODE_TXL, dst)); 1160 break; 1161 case ir_txd: 1162 case ir_txf: 1163 assert(!"GLSL 1.30 features unsupported"); 1164 break; 1165 } 1166 inst->base_mrf = base_mrf; 1167 inst->mlen = mlen; 1168 1169 return inst; 1170} 1171 1172void 1173fs_visitor::visit(ir_texture *ir) 1174{ 1175 int sampler; 1176 fs_inst *inst = NULL; 1177 1178 ir->coordinate->accept(this); 1179 fs_reg coordinate = this->result; 1180 1181 /* Should be lowered by do_lower_texture_projection */ 1182 assert(!ir->projector); 1183 1184 sampler = _mesa_get_sampler_uniform_value(ir->sampler, 1185 ctx->Shader.CurrentProgram, 1186 &brw->fragment_program->Base); 1187 sampler = c->fp->program.Base.SamplerUnits[sampler]; 1188 1189 /* The 965 requires the EU to do the normalization of GL rectangle 1190 * texture coordinates. We use the program parameter state 1191 * tracking to get the scaling factor. 1192 */ 1193 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1194 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1195 int tokens[STATE_LENGTH] = { 1196 STATE_INTERNAL, 1197 STATE_TEXRECT_SCALE, 1198 sampler, 1199 0, 1200 0 1201 }; 1202 1203 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1204 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1205 GLuint index = _mesa_add_state_reference(params, 1206 (gl_state_index *)tokens); 1207 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 1208 1209 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; 1210 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; 1211 1212 fs_reg dst = fs_reg(this, ir->coordinate->type); 1213 fs_reg src = coordinate; 1214 coordinate = dst; 1215 1216 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); 1217 dst.reg_offset++; 1218 src.reg_offset++; 1219 emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); 1220 } 1221 1222 /* Writemasking doesn't eliminate channels on SIMD8 texture 1223 * samples, so don't worry about them. 1224 */ 1225 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1226 1227 if (intel->gen < 5) { 1228 inst = emit_texture_gen4(ir, dst, coordinate); 1229 } else { 1230 inst = emit_texture_gen5(ir, dst, coordinate); 1231 } 1232 1233 inst->sampler = sampler; 1234 1235 this->result = dst; 1236 1237 if (ir->shadow_comparitor) 1238 inst->shadow_compare = true; 1239 1240 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1241 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1242 1243 for (int i = 0; i < 4; i++) { 1244 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1245 fs_reg l = swizzle_dst; 1246 l.reg_offset += i; 1247 1248 if (swiz == SWIZZLE_ZERO) { 1249 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1250 } else if (swiz == SWIZZLE_ONE) { 1251 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1252 } else { 1253 fs_reg r = dst; 1254 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1255 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1256 } 1257 } 1258 this->result = swizzle_dst; 1259 } 1260} 1261 1262void 1263fs_visitor::visit(ir_swizzle *ir) 1264{ 1265 ir->val->accept(this); 1266 fs_reg val = this->result; 1267 1268 if (ir->type->vector_elements == 1) { 1269 this->result.reg_offset += ir->mask.x; 1270 return; 1271 } 1272 1273 fs_reg result = fs_reg(this, ir->type); 1274 this->result = result; 1275 1276 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1277 fs_reg channel = val; 1278 int swiz = 0; 1279 1280 switch (i) { 1281 case 0: 1282 swiz = ir->mask.x; 1283 break; 1284 case 1: 1285 swiz = ir->mask.y; 1286 break; 1287 case 2: 1288 swiz = ir->mask.z; 1289 break; 1290 case 3: 1291 swiz = ir->mask.w; 1292 break; 1293 } 1294 1295 channel.reg_offset += swiz; 1296 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1297 result.reg_offset++; 1298 } 1299} 1300 1301void 1302fs_visitor::visit(ir_discard *ir) 1303{ 1304 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1305 1306 assert(ir->condition == NULL); /* FINISHME */ 1307 1308 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null)); 1309 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp)); 1310 kill_emitted = true; 1311} 1312 1313void 1314fs_visitor::visit(ir_constant *ir) 1315{ 1316 fs_reg reg(this, ir->type); 1317 this->result = reg; 1318 1319 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1320 switch (ir->type->base_type) { 1321 case GLSL_TYPE_FLOAT: 1322 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1323 break; 1324 case GLSL_TYPE_UINT: 1325 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1326 break; 1327 case GLSL_TYPE_INT: 1328 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1329 break; 1330 case GLSL_TYPE_BOOL: 1331 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1332 break; 1333 default: 1334 assert(!"Non-float/uint/int/bool constant"); 1335 } 1336 reg.reg_offset++; 1337 } 1338} 1339 1340void 1341fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1342{ 1343 ir_expression *expr = ir->as_expression(); 1344 1345 if (expr) { 1346 fs_reg op[2]; 1347 fs_inst *inst; 1348 1349 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1350 assert(expr->operands[i]->type->is_scalar()); 1351 1352 expr->operands[i]->accept(this); 1353 op[i] = this->result; 1354 } 1355 1356 switch (expr->operation) { 1357 case ir_unop_logic_not: 1358 inst = emit(fs_inst(BRW_OPCODE_ADD, reg_null, op[0], fs_reg(-1))); 1359 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1360 break; 1361 1362 case ir_binop_logic_xor: 1363 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null, op[0], op[1])); 1364 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1365 break; 1366 1367 case ir_binop_logic_or: 1368 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null, op[0], op[1])); 1369 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1370 break; 1371 1372 case ir_binop_logic_and: 1373 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null, op[0], op[1])); 1374 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1375 break; 1376 1377 case ir_unop_f2b: 1378 if (intel->gen >= 6) { 1379 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 1380 } else { 1381 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, op[0])); 1382 } 1383 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1384 break; 1385 1386 case ir_unop_i2b: 1387 if (intel->gen >= 6) { 1388 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0))); 1389 } else { 1390 inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, op[0])); 1391 } 1392 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1393 break; 1394 1395 case ir_binop_greater: 1396 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1397 inst->conditional_mod = BRW_CONDITIONAL_G; 1398 break; 1399 case ir_binop_gequal: 1400 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1401 inst->conditional_mod = BRW_CONDITIONAL_GE; 1402 break; 1403 case ir_binop_less: 1404 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1405 inst->conditional_mod = BRW_CONDITIONAL_L; 1406 break; 1407 case ir_binop_lequal: 1408 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1409 inst->conditional_mod = BRW_CONDITIONAL_LE; 1410 break; 1411 case ir_binop_equal: 1412 case ir_binop_all_equal: 1413 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1414 inst->conditional_mod = BRW_CONDITIONAL_Z; 1415 break; 1416 case ir_binop_nequal: 1417 case ir_binop_any_nequal: 1418 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1])); 1419 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1420 break; 1421 default: 1422 assert(!"not reached"); 1423 this->fail = true; 1424 break; 1425 } 1426 return; 1427 } 1428 1429 ir->accept(this); 1430 1431 if (intel->gen >= 6) { 1432 fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null, 1433 this->result, fs_reg(1))); 1434 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1435 } else { 1436 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, this->result)); 1437 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1438 } 1439} 1440 1441void 1442fs_visitor::visit(ir_if *ir) 1443{ 1444 fs_inst *inst; 1445 1446 /* Don't point the annotation at the if statement, because then it plus 1447 * the then and else blocks get printed. 1448 */ 1449 this->base_ir = ir->condition; 1450 1451 emit_bool_to_cond_code(ir->condition); 1452 1453 inst = emit(fs_inst(BRW_OPCODE_IF)); 1454 inst->predicated = true; 1455 1456 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1457 ir_instruction *ir = (ir_instruction *)iter.get(); 1458 this->base_ir = ir; 1459 1460 ir->accept(this); 1461 } 1462 1463 if (!ir->else_instructions.is_empty()) { 1464 emit(fs_inst(BRW_OPCODE_ELSE)); 1465 1466 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1467 ir_instruction *ir = (ir_instruction *)iter.get(); 1468 this->base_ir = ir; 1469 1470 ir->accept(this); 1471 } 1472 } 1473 1474 emit(fs_inst(BRW_OPCODE_ENDIF)); 1475} 1476 1477void 1478fs_visitor::visit(ir_loop *ir) 1479{ 1480 fs_reg counter = reg_undef; 1481 1482 if (ir->counter) { 1483 this->base_ir = ir->counter; 1484 ir->counter->accept(this); 1485 counter = *(variable_storage(ir->counter)); 1486 1487 if (ir->from) { 1488 this->base_ir = ir->from; 1489 ir->from->accept(this); 1490 1491 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1492 } 1493 } 1494 1495 emit(fs_inst(BRW_OPCODE_DO)); 1496 1497 if (ir->to) { 1498 this->base_ir = ir->to; 1499 ir->to->accept(this); 1500 1501 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1502 counter, this->result)); 1503 switch (ir->cmp) { 1504 case ir_binop_equal: 1505 inst->conditional_mod = BRW_CONDITIONAL_Z; 1506 break; 1507 case ir_binop_nequal: 1508 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1509 break; 1510 case ir_binop_gequal: 1511 inst->conditional_mod = BRW_CONDITIONAL_GE; 1512 break; 1513 case ir_binop_lequal: 1514 inst->conditional_mod = BRW_CONDITIONAL_LE; 1515 break; 1516 case ir_binop_greater: 1517 inst->conditional_mod = BRW_CONDITIONAL_G; 1518 break; 1519 case ir_binop_less: 1520 inst->conditional_mod = BRW_CONDITIONAL_L; 1521 break; 1522 default: 1523 assert(!"not reached: unknown loop condition"); 1524 this->fail = true; 1525 break; 1526 } 1527 1528 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1529 inst->predicated = true; 1530 } 1531 1532 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1533 ir_instruction *ir = (ir_instruction *)iter.get(); 1534 1535 this->base_ir = ir; 1536 ir->accept(this); 1537 } 1538 1539 if (ir->increment) { 1540 this->base_ir = ir->increment; 1541 ir->increment->accept(this); 1542 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1543 } 1544 1545 emit(fs_inst(BRW_OPCODE_WHILE)); 1546} 1547 1548void 1549fs_visitor::visit(ir_loop_jump *ir) 1550{ 1551 switch (ir->mode) { 1552 case ir_loop_jump::jump_break: 1553 emit(fs_inst(BRW_OPCODE_BREAK)); 1554 break; 1555 case ir_loop_jump::jump_continue: 1556 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1557 break; 1558 } 1559} 1560 1561void 1562fs_visitor::visit(ir_call *ir) 1563{ 1564 assert(!"FINISHME"); 1565} 1566 1567void 1568fs_visitor::visit(ir_return *ir) 1569{ 1570 assert(!"FINISHME"); 1571} 1572 1573void 1574fs_visitor::visit(ir_function *ir) 1575{ 1576 /* Ignore function bodies other than main() -- we shouldn't see calls to 1577 * them since they should all be inlined before we get to ir_to_mesa. 1578 */ 1579 if (strcmp(ir->name, "main") == 0) { 1580 const ir_function_signature *sig; 1581 exec_list empty; 1582 1583 sig = ir->matching_signature(&empty); 1584 1585 assert(sig); 1586 1587 foreach_iter(exec_list_iterator, iter, sig->body) { 1588 ir_instruction *ir = (ir_instruction *)iter.get(); 1589 this->base_ir = ir; 1590 1591 ir->accept(this); 1592 } 1593 } 1594} 1595 1596void 1597fs_visitor::visit(ir_function_signature *ir) 1598{ 1599 assert(!"not reached"); 1600 (void)ir; 1601} 1602 1603fs_inst * 1604fs_visitor::emit(fs_inst inst) 1605{ 1606 fs_inst *list_inst = new(mem_ctx) fs_inst; 1607 *list_inst = inst; 1608 1609 list_inst->annotation = this->current_annotation; 1610 list_inst->ir = this->base_ir; 1611 1612 this->instructions.push_tail(list_inst); 1613 1614 return list_inst; 1615} 1616 1617/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1618void 1619fs_visitor::emit_dummy_fs() 1620{ 1621 /* Everyone's favorite color. */ 1622 emit(fs_inst(BRW_OPCODE_MOV, 1623 fs_reg(MRF, 2), 1624 fs_reg(1.0f))); 1625 emit(fs_inst(BRW_OPCODE_MOV, 1626 fs_reg(MRF, 3), 1627 fs_reg(0.0f))); 1628 emit(fs_inst(BRW_OPCODE_MOV, 1629 fs_reg(MRF, 4), 1630 fs_reg(1.0f))); 1631 emit(fs_inst(BRW_OPCODE_MOV, 1632 fs_reg(MRF, 5), 1633 fs_reg(0.0f))); 1634 1635 fs_inst *write; 1636 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1637 fs_reg(0), 1638 fs_reg(0))); 1639 write->base_mrf = 0; 1640} 1641 1642/* The register location here is relative to the start of the URB 1643 * data. It will get adjusted to be a real location before 1644 * generate_code() time. 1645 */ 1646struct brw_reg 1647fs_visitor::interp_reg(int location, int channel) 1648{ 1649 int regnr = urb_setup[location] * 2 + channel / 2; 1650 int stride = (channel & 1) * 4; 1651 1652 assert(urb_setup[location] != -1); 1653 1654 return brw_vec1_grf(regnr, stride); 1655} 1656 1657/** Emits the interpolation for the varying inputs. */ 1658void 1659fs_visitor::emit_interpolation_setup_gen4() 1660{ 1661 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1662 1663 this->current_annotation = "compute pixel centers"; 1664 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1665 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1666 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1667 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1668 emit(fs_inst(BRW_OPCODE_ADD, 1669 this->pixel_x, 1670 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1671 fs_reg(brw_imm_v(0x10101010)))); 1672 emit(fs_inst(BRW_OPCODE_ADD, 1673 this->pixel_y, 1674 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1675 fs_reg(brw_imm_v(0x11001100)))); 1676 1677 this->current_annotation = "compute pixel deltas from v0"; 1678 if (brw->has_pln) { 1679 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1680 this->delta_y = this->delta_x; 1681 this->delta_y.reg_offset++; 1682 } else { 1683 this->delta_x = fs_reg(this, glsl_type::float_type); 1684 this->delta_y = fs_reg(this, glsl_type::float_type); 1685 } 1686 emit(fs_inst(BRW_OPCODE_ADD, 1687 this->delta_x, 1688 this->pixel_x, 1689 fs_reg(negate(brw_vec1_grf(1, 0))))); 1690 emit(fs_inst(BRW_OPCODE_ADD, 1691 this->delta_y, 1692 this->pixel_y, 1693 fs_reg(negate(brw_vec1_grf(1, 1))))); 1694 1695 this->current_annotation = "compute pos.w and 1/pos.w"; 1696 /* Compute wpos.w. It's always in our setup, since it's needed to 1697 * interpolate the other attributes. 1698 */ 1699 this->wpos_w = fs_reg(this, glsl_type::float_type); 1700 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1701 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1702 /* Compute the pixel 1/W value from wpos.w. */ 1703 this->pixel_w = fs_reg(this, glsl_type::float_type); 1704 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1705 this->current_annotation = NULL; 1706} 1707 1708/** Emits the interpolation for the varying inputs. */ 1709void 1710fs_visitor::emit_interpolation_setup_gen6() 1711{ 1712 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1713 1714 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1715 this->current_annotation = "compute pixel centers"; 1716 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1717 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1718 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1719 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1720 emit(fs_inst(BRW_OPCODE_ADD, 1721 int_pixel_x, 1722 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1723 fs_reg(brw_imm_v(0x10101010)))); 1724 emit(fs_inst(BRW_OPCODE_ADD, 1725 int_pixel_y, 1726 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1727 fs_reg(brw_imm_v(0x11001100)))); 1728 1729 /* As of gen6, we can no longer mix float and int sources. We have 1730 * to turn the integer pixel centers into floats for their actual 1731 * use. 1732 */ 1733 this->pixel_x = fs_reg(this, glsl_type::float_type); 1734 this->pixel_y = fs_reg(this, glsl_type::float_type); 1735 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); 1736 emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); 1737 1738 this->current_annotation = "compute 1/pos.w"; 1739 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1740 this->pixel_w = fs_reg(this, glsl_type::float_type); 1741 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); 1742 1743 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1744 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1745 1746 this->current_annotation = NULL; 1747} 1748 1749void 1750fs_visitor::emit_fb_writes() 1751{ 1752 this->current_annotation = "FB write header"; 1753 GLboolean header_present = GL_TRUE; 1754 int nr = 0; 1755 1756 if (intel->gen >= 6 && 1757 !this->kill_emitted && 1758 c->key.nr_color_regions == 1) { 1759 header_present = false; 1760 } 1761 1762 if (header_present) { 1763 /* m0, m1 header */ 1764 nr += 2; 1765 } 1766 1767 if (c->key.aa_dest_stencil_reg) { 1768 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1769 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1770 } 1771 1772 /* Reserve space for color. It'll be filled in per MRT below. */ 1773 int color_mrf = nr; 1774 nr += 4; 1775 1776 if (c->key.source_depth_to_render_target) { 1777 if (c->key.computes_depth) { 1778 /* Hand over gl_FragDepth. */ 1779 assert(this->frag_depth); 1780 fs_reg depth = *(variable_storage(this->frag_depth)); 1781 1782 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1783 } else { 1784 /* Pass through the payload depth. */ 1785 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1786 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1787 } 1788 } 1789 1790 if (c->key.dest_depth_reg) { 1791 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1792 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1793 } 1794 1795 fs_reg color = reg_undef; 1796 if (this->frag_color) 1797 color = *(variable_storage(this->frag_color)); 1798 else if (this->frag_data) 1799 color = *(variable_storage(this->frag_data)); 1800 1801 for (int target = 0; target < c->key.nr_color_regions; target++) { 1802 this->current_annotation = talloc_asprintf(this->mem_ctx, 1803 "FB write target %d", 1804 target); 1805 if (this->frag_color || this->frag_data) { 1806 for (int i = 0; i < 4; i++) { 1807 emit(fs_inst(BRW_OPCODE_MOV, 1808 fs_reg(MRF, color_mrf + i), 1809 color)); 1810 color.reg_offset++; 1811 } 1812 } 1813 1814 if (this->frag_color) 1815 color.reg_offset -= 4; 1816 1817 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1818 reg_undef, reg_undef)); 1819 inst->target = target; 1820 inst->base_mrf = 0; 1821 inst->mlen = nr; 1822 if (target == c->key.nr_color_regions - 1) 1823 inst->eot = true; 1824 inst->header_present = header_present; 1825 } 1826 1827 if (c->key.nr_color_regions == 0) { 1828 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1829 reg_undef, reg_undef)); 1830 inst->base_mrf = 0; 1831 inst->mlen = nr; 1832 inst->eot = true; 1833 inst->header_present = header_present; 1834 } 1835 1836 this->current_annotation = NULL; 1837} 1838 1839void 1840fs_visitor::generate_fb_write(fs_inst *inst) 1841{ 1842 GLboolean eot = inst->eot; 1843 struct brw_reg implied_header; 1844 1845 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1846 * move, here's g1. 1847 */ 1848 brw_push_insn_state(p); 1849 brw_set_mask_control(p, BRW_MASK_DISABLE); 1850 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1851 1852 if (inst->header_present) { 1853 if (intel->gen >= 6) { 1854 brw_MOV(p, 1855 brw_message_reg(inst->base_mrf), 1856 brw_vec8_grf(0, 0)); 1857 implied_header = brw_null_reg(); 1858 } else { 1859 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); 1860 } 1861 1862 brw_MOV(p, 1863 brw_message_reg(inst->base_mrf + 1), 1864 brw_vec8_grf(1, 0)); 1865 } else { 1866 implied_header = brw_null_reg(); 1867 } 1868 1869 brw_pop_insn_state(p); 1870 1871 brw_fb_WRITE(p, 1872 8, /* dispatch_width */ 1873 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1874 inst->base_mrf, 1875 implied_header, 1876 inst->target, 1877 inst->mlen, 1878 0, 1879 eot); 1880} 1881 1882void 1883fs_visitor::generate_linterp(fs_inst *inst, 1884 struct brw_reg dst, struct brw_reg *src) 1885{ 1886 struct brw_reg delta_x = src[0]; 1887 struct brw_reg delta_y = src[1]; 1888 struct brw_reg interp = src[2]; 1889 1890 if (brw->has_pln && 1891 delta_y.nr == delta_x.nr + 1 && 1892 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1893 brw_PLN(p, dst, interp, delta_x); 1894 } else { 1895 brw_LINE(p, brw_null_reg(), interp, delta_x); 1896 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1897 } 1898} 1899 1900void 1901fs_visitor::generate_math(fs_inst *inst, 1902 struct brw_reg dst, struct brw_reg *src) 1903{ 1904 int op; 1905 1906 switch (inst->opcode) { 1907 case FS_OPCODE_RCP: 1908 op = BRW_MATH_FUNCTION_INV; 1909 break; 1910 case FS_OPCODE_RSQ: 1911 op = BRW_MATH_FUNCTION_RSQ; 1912 break; 1913 case FS_OPCODE_SQRT: 1914 op = BRW_MATH_FUNCTION_SQRT; 1915 break; 1916 case FS_OPCODE_EXP2: 1917 op = BRW_MATH_FUNCTION_EXP; 1918 break; 1919 case FS_OPCODE_LOG2: 1920 op = BRW_MATH_FUNCTION_LOG; 1921 break; 1922 case FS_OPCODE_POW: 1923 op = BRW_MATH_FUNCTION_POW; 1924 break; 1925 case FS_OPCODE_SIN: 1926 op = BRW_MATH_FUNCTION_SIN; 1927 break; 1928 case FS_OPCODE_COS: 1929 op = BRW_MATH_FUNCTION_COS; 1930 break; 1931 default: 1932 assert(!"not reached: unknown math function"); 1933 op = 0; 1934 break; 1935 } 1936 1937 if (intel->gen >= 6) { 1938 assert(inst->mlen == 0); 1939 1940 if (inst->opcode == FS_OPCODE_POW) { 1941 brw_math2(p, dst, op, src[0], src[1]); 1942 } else { 1943 brw_math(p, dst, 1944 op, 1945 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1946 BRW_MATH_SATURATE_NONE, 1947 0, src[0], 1948 BRW_MATH_DATA_VECTOR, 1949 BRW_MATH_PRECISION_FULL); 1950 } 1951 } else { 1952 assert(inst->mlen >= 1); 1953 1954 brw_math(p, dst, 1955 op, 1956 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1957 BRW_MATH_SATURATE_NONE, 1958 inst->base_mrf, src[0], 1959 BRW_MATH_DATA_VECTOR, 1960 BRW_MATH_PRECISION_FULL); 1961 } 1962} 1963 1964void 1965fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) 1966{ 1967 int msg_type = -1; 1968 int rlen = 4; 1969 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 1970 1971 if (intel->gen >= 5) { 1972 switch (inst->opcode) { 1973 case FS_OPCODE_TEX: 1974 if (inst->shadow_compare) { 1975 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1976 } else { 1977 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1978 } 1979 break; 1980 case FS_OPCODE_TXB: 1981 if (inst->shadow_compare) { 1982 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1983 } else { 1984 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1985 } 1986 break; 1987 } 1988 } else { 1989 switch (inst->opcode) { 1990 case FS_OPCODE_TEX: 1991 /* Note that G45 and older determines shadow compare and dispatch width 1992 * from message length for most messages. 1993 */ 1994 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 1995 if (inst->shadow_compare) { 1996 assert(inst->mlen == 5); 1997 } else { 1998 assert(inst->mlen <= 6); 1999 } 2000 break; 2001 case FS_OPCODE_TXB: 2002 if (inst->shadow_compare) { 2003 assert(inst->mlen == 5); 2004 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2005 } else { 2006 assert(inst->mlen == 8); 2007 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2008 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2009 } 2010 break; 2011 } 2012 } 2013 assert(msg_type != -1); 2014 2015 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2016 rlen = 8; 2017 dst = vec16(dst); 2018 } 2019 2020 brw_SAMPLE(p, 2021 retype(dst, BRW_REGISTER_TYPE_UW), 2022 inst->base_mrf, 2023 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2024 SURF_INDEX_TEXTURE(inst->sampler), 2025 inst->sampler, 2026 WRITEMASK_XYZW, 2027 msg_type, 2028 rlen, 2029 inst->mlen, 2030 0, 2031 1, 2032 simd_mode); 2033} 2034 2035 2036/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2037 * looking like: 2038 * 2039 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2040 * 2041 * and we're trying to produce: 2042 * 2043 * DDX DDY 2044 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2045 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2046 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2047 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2048 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2049 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2050 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2051 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2052 * 2053 * and add another set of two more subspans if in 16-pixel dispatch mode. 2054 * 2055 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2056 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2057 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2058 * between each other. We could probably do it like ddx and swizzle the right 2059 * order later, but bail for now and just produce 2060 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2061 */ 2062void 2063fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2064{ 2065 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2066 BRW_REGISTER_TYPE_F, 2067 BRW_VERTICAL_STRIDE_2, 2068 BRW_WIDTH_2, 2069 BRW_HORIZONTAL_STRIDE_0, 2070 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2071 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2072 BRW_REGISTER_TYPE_F, 2073 BRW_VERTICAL_STRIDE_2, 2074 BRW_WIDTH_2, 2075 BRW_HORIZONTAL_STRIDE_0, 2076 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2077 brw_ADD(p, dst, src0, negate(src1)); 2078} 2079 2080void 2081fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2082{ 2083 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2084 BRW_REGISTER_TYPE_F, 2085 BRW_VERTICAL_STRIDE_4, 2086 BRW_WIDTH_4, 2087 BRW_HORIZONTAL_STRIDE_0, 2088 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2089 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2090 BRW_REGISTER_TYPE_F, 2091 BRW_VERTICAL_STRIDE_4, 2092 BRW_WIDTH_4, 2093 BRW_HORIZONTAL_STRIDE_0, 2094 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2095 brw_ADD(p, dst, src0, negate(src1)); 2096} 2097 2098void 2099fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask) 2100{ 2101 brw_push_insn_state(p); 2102 brw_set_mask_control(p, BRW_MASK_DISABLE); 2103 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */ 2104 brw_pop_insn_state(p); 2105} 2106 2107void 2108fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) 2109{ 2110 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2111 mask = brw_uw1_reg(mask.file, mask.nr, 0); 2112 2113 brw_push_insn_state(p); 2114 brw_set_mask_control(p, BRW_MASK_DISABLE); 2115 brw_AND(p, g0, mask, g0); 2116 brw_pop_insn_state(p); 2117} 2118 2119void 2120fs_visitor::assign_curb_setup() 2121{ 2122 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 2123 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2124 2125 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2126 foreach_iter(exec_list_iterator, iter, this->instructions) { 2127 fs_inst *inst = (fs_inst *)iter.get(); 2128 2129 for (unsigned int i = 0; i < 3; i++) { 2130 if (inst->src[i].file == UNIFORM) { 2131 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2132 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2133 constant_nr / 8, 2134 constant_nr % 8); 2135 2136 inst->src[i].file = FIXED_HW_REG; 2137 inst->src[i].fixed_hw_reg = brw_reg; 2138 } 2139 } 2140 } 2141} 2142 2143void 2144fs_visitor::calculate_urb_setup() 2145{ 2146 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2147 urb_setup[i] = -1; 2148 } 2149 2150 int urb_next = 0; 2151 /* Figure out where each of the incoming setup attributes lands. */ 2152 if (intel->gen >= 6) { 2153 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2154 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { 2155 urb_setup[i] = urb_next++; 2156 } 2157 } 2158 } else { 2159 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2160 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2161 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2162 int fp_index; 2163 2164 if (i >= VERT_RESULT_VAR0) 2165 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2166 else if (i <= VERT_RESULT_TEX7) 2167 fp_index = i; 2168 else 2169 fp_index = -1; 2170 2171 if (fp_index >= 0) 2172 urb_setup[fp_index] = urb_next++; 2173 } 2174 } 2175 } 2176 2177 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2178 c->prog_data.urb_read_length = urb_next * 2; 2179} 2180 2181void 2182fs_visitor::assign_urb_setup() 2183{ 2184 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2185 2186 /* Offset all the urb_setup[] index by the actual position of the 2187 * setup regs, now that the location of the constants has been chosen. 2188 */ 2189 foreach_iter(exec_list_iterator, iter, this->instructions) { 2190 fs_inst *inst = (fs_inst *)iter.get(); 2191 2192 if (inst->opcode != FS_OPCODE_LINTERP) 2193 continue; 2194 2195 assert(inst->src[2].file == FIXED_HW_REG); 2196 2197 inst->src[2].fixed_hw_reg.nr += urb_start; 2198 } 2199 2200 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2201} 2202 2203static void 2204assign_reg(int *reg_hw_locations, fs_reg *reg) 2205{ 2206 if (reg->file == GRF && reg->reg != 0) { 2207 assert(reg->reg_offset >= 0); 2208 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2209 reg->reg = 0; 2210 } 2211} 2212 2213void 2214fs_visitor::assign_regs_trivial() 2215{ 2216 int last_grf = 0; 2217 int hw_reg_mapping[this->virtual_grf_next]; 2218 int i; 2219 2220 hw_reg_mapping[0] = 0; 2221 hw_reg_mapping[1] = this->first_non_payload_grf; 2222 for (i = 2; i < this->virtual_grf_next; i++) { 2223 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2224 this->virtual_grf_sizes[i - 1]); 2225 } 2226 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2227 2228 foreach_iter(exec_list_iterator, iter, this->instructions) { 2229 fs_inst *inst = (fs_inst *)iter.get(); 2230 2231 assign_reg(hw_reg_mapping, &inst->dst); 2232 assign_reg(hw_reg_mapping, &inst->src[0]); 2233 assign_reg(hw_reg_mapping, &inst->src[1]); 2234 } 2235 2236 this->grf_used = last_grf + 1; 2237} 2238 2239void 2240fs_visitor::assign_regs() 2241{ 2242 int last_grf = 0; 2243 int hw_reg_mapping[this->virtual_grf_next + 1]; 2244 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2245 int class_sizes[base_reg_count]; 2246 int class_count = 0; 2247 int aligned_pair_class = -1; 2248 2249 /* Set up the register classes. 2250 * 2251 * The base registers store a scalar value. For texture samples, 2252 * we get virtual GRFs composed of 4 contiguous hw register. For 2253 * structures and arrays, we store them as contiguous larger things 2254 * than that, though we should be able to do better most of the 2255 * time. 2256 */ 2257 class_sizes[class_count++] = 1; 2258 if (brw->has_pln && intel->gen < 6) { 2259 /* Always set up the (unaligned) pairs for gen5, so we can find 2260 * them for making the aligned pair class. 2261 */ 2262 class_sizes[class_count++] = 2; 2263 } 2264 for (int r = 1; r < this->virtual_grf_next; r++) { 2265 int i; 2266 2267 for (i = 0; i < class_count; i++) { 2268 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2269 break; 2270 } 2271 if (i == class_count) { 2272 if (this->virtual_grf_sizes[r] >= base_reg_count) { 2273 fprintf(stderr, "Object too large to register allocate.\n"); 2274 this->fail = true; 2275 } 2276 2277 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2278 } 2279 } 2280 2281 int ra_reg_count = 0; 2282 int class_base_reg[class_count]; 2283 int class_reg_count[class_count]; 2284 int classes[class_count + 1]; 2285 2286 for (int i = 0; i < class_count; i++) { 2287 class_base_reg[i] = ra_reg_count; 2288 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2289 ra_reg_count += class_reg_count[i]; 2290 } 2291 2292 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2293 for (int i = 0; i < class_count; i++) { 2294 classes[i] = ra_alloc_reg_class(regs); 2295 2296 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2297 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2298 } 2299 2300 /* Add conflicts between our contiguous registers aliasing 2301 * base regs and other register classes' contiguous registers 2302 * that alias base regs, or the base regs themselves for classes[0]. 2303 */ 2304 for (int c = 0; c <= i; c++) { 2305 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2306 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2307 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); 2308 c_r++) { 2309 2310 if (0) { 2311 printf("%d/%d conflicts %d/%d\n", 2312 class_sizes[i], this->first_non_payload_grf + i_r, 2313 class_sizes[c], this->first_non_payload_grf + c_r); 2314 } 2315 2316 ra_add_reg_conflict(regs, 2317 class_base_reg[i] + i_r, 2318 class_base_reg[c] + c_r); 2319 } 2320 } 2321 } 2322 } 2323 2324 /* Add a special class for aligned pairs, which we'll put delta_x/y 2325 * in on gen5 so that we can do PLN. 2326 */ 2327 if (brw->has_pln && intel->gen < 6) { 2328 int reg_count = (base_reg_count - 1) / 2; 2329 int unaligned_pair_class = 1; 2330 assert(class_sizes[unaligned_pair_class] == 2); 2331 2332 aligned_pair_class = class_count; 2333 classes[aligned_pair_class] = ra_alloc_reg_class(regs); 2334 class_sizes[aligned_pair_class] = 2; 2335 class_base_reg[aligned_pair_class] = 0; 2336 class_reg_count[aligned_pair_class] = 0; 2337 int start = (this->first_non_payload_grf & 1) ? 1 : 0; 2338 2339 for (int i = 0; i < reg_count; i++) { 2340 ra_class_add_reg(regs, classes[aligned_pair_class], 2341 class_base_reg[unaligned_pair_class] + i * 2 + start); 2342 } 2343 class_count++; 2344 } 2345 2346 ra_set_finalize(regs); 2347 2348 struct ra_graph *g = ra_alloc_interference_graph(regs, 2349 this->virtual_grf_next); 2350 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2351 * with nodes. 2352 */ 2353 ra_set_node_class(g, 0, classes[0]); 2354 2355 for (int i = 1; i < this->virtual_grf_next; i++) { 2356 for (int c = 0; c < class_count; c++) { 2357 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2358 if (aligned_pair_class >= 0 && 2359 this->delta_x.reg == i) { 2360 ra_set_node_class(g, i, classes[aligned_pair_class]); 2361 } else { 2362 ra_set_node_class(g, i, classes[c]); 2363 } 2364 break; 2365 } 2366 } 2367 2368 for (int j = 1; j < i; j++) { 2369 if (virtual_grf_interferes(i, j)) { 2370 ra_add_node_interference(g, i, j); 2371 } 2372 } 2373 } 2374 2375 /* FINISHME: Handle spilling */ 2376 if (!ra_allocate_no_spills(g)) { 2377 fprintf(stderr, "Failed to allocate registers.\n"); 2378 this->fail = true; 2379 return; 2380 } 2381 2382 /* Get the chosen virtual registers for each node, and map virtual 2383 * regs in the register classes back down to real hardware reg 2384 * numbers. 2385 */ 2386 hw_reg_mapping[0] = 0; /* unused */ 2387 for (int i = 1; i < this->virtual_grf_next; i++) { 2388 int reg = ra_get_node_reg(g, i); 2389 int hw_reg = -1; 2390 2391 for (int c = 0; c < class_count; c++) { 2392 if (reg >= class_base_reg[c] && 2393 reg < class_base_reg[c] + class_reg_count[c]) { 2394 hw_reg = reg - class_base_reg[c]; 2395 break; 2396 } 2397 } 2398 2399 assert(hw_reg >= 0); 2400 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2401 last_grf = MAX2(last_grf, 2402 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2403 } 2404 2405 foreach_iter(exec_list_iterator, iter, this->instructions) { 2406 fs_inst *inst = (fs_inst *)iter.get(); 2407 2408 assign_reg(hw_reg_mapping, &inst->dst); 2409 assign_reg(hw_reg_mapping, &inst->src[0]); 2410 assign_reg(hw_reg_mapping, &inst->src[1]); 2411 } 2412 2413 this->grf_used = last_grf + 1; 2414 2415 talloc_free(g); 2416 talloc_free(regs); 2417} 2418 2419/** 2420 * Split large virtual GRFs into separate components if we can. 2421 * 2422 * This is mostly duplicated with what brw_fs_vector_splitting does, 2423 * but that's really conservative because it's afraid of doing 2424 * splitting that doesn't result in real progress after the rest of 2425 * the optimization phases, which would cause infinite looping in 2426 * optimization. We can do it once here, safely. This also has the 2427 * opportunity to split interpolated values, or maybe even uniforms, 2428 * which we don't have at the IR level. 2429 * 2430 * We want to split, because virtual GRFs are what we register 2431 * allocate and spill (due to contiguousness requirements for some 2432 * instructions), and they're what we naturally generate in the 2433 * codegen process, but most virtual GRFs don't actually need to be 2434 * contiguous sets of GRFs. If we split, we'll end up with reduced 2435 * live intervals and better dead code elimination and coalescing. 2436 */ 2437void 2438fs_visitor::split_virtual_grfs() 2439{ 2440 int num_vars = this->virtual_grf_next; 2441 bool split_grf[num_vars]; 2442 int new_virtual_grf[num_vars]; 2443 2444 /* Try to split anything > 0 sized. */ 2445 for (int i = 0; i < num_vars; i++) { 2446 if (this->virtual_grf_sizes[i] != 1) 2447 split_grf[i] = true; 2448 else 2449 split_grf[i] = false; 2450 } 2451 2452 if (brw->has_pln) { 2453 /* PLN opcodes rely on the delta_xy being contiguous. */ 2454 split_grf[this->delta_x.reg] = false; 2455 } 2456 2457 foreach_iter(exec_list_iterator, iter, this->instructions) { 2458 fs_inst *inst = (fs_inst *)iter.get(); 2459 2460 /* Texturing produces 4 contiguous registers, so no splitting. */ 2461 if ((inst->opcode == FS_OPCODE_TEX || 2462 inst->opcode == FS_OPCODE_TXB || 2463 inst->opcode == FS_OPCODE_TXL) && 2464 inst->dst.file == GRF) { 2465 split_grf[inst->dst.reg] = false; 2466 } 2467 } 2468 2469 /* Allocate new space for split regs. Note that the virtual 2470 * numbers will be contiguous. 2471 */ 2472 for (int i = 0; i < num_vars; i++) { 2473 if (split_grf[i]) { 2474 new_virtual_grf[i] = virtual_grf_alloc(1); 2475 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { 2476 int reg = virtual_grf_alloc(1); 2477 assert(reg == new_virtual_grf[i] + j - 1); 2478 } 2479 this->virtual_grf_sizes[i] = 1; 2480 } 2481 } 2482 2483 foreach_iter(exec_list_iterator, iter, this->instructions) { 2484 fs_inst *inst = (fs_inst *)iter.get(); 2485 2486 if (inst->dst.file == GRF && 2487 split_grf[inst->dst.reg] && 2488 inst->dst.reg_offset != 0) { 2489 inst->dst.reg = (new_virtual_grf[inst->dst.reg] + 2490 inst->dst.reg_offset - 1); 2491 inst->dst.reg_offset = 0; 2492 } 2493 for (int i = 0; i < 3; i++) { 2494 if (inst->src[i].file == GRF && 2495 split_grf[inst->src[i].reg] && 2496 inst->src[i].reg_offset != 0) { 2497 inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] + 2498 inst->src[i].reg_offset - 1); 2499 inst->src[i].reg_offset = 0; 2500 } 2501 } 2502 } 2503} 2504 2505void 2506fs_visitor::calculate_live_intervals() 2507{ 2508 int num_vars = this->virtual_grf_next; 2509 int *def = talloc_array(mem_ctx, int, num_vars); 2510 int *use = talloc_array(mem_ctx, int, num_vars); 2511 int loop_depth = 0; 2512 int loop_start = 0; 2513 2514 for (int i = 0; i < num_vars; i++) { 2515 def[i] = 1 << 30; 2516 use[i] = -1; 2517 } 2518 2519 int ip = 0; 2520 foreach_iter(exec_list_iterator, iter, this->instructions) { 2521 fs_inst *inst = (fs_inst *)iter.get(); 2522 2523 if (inst->opcode == BRW_OPCODE_DO) { 2524 if (loop_depth++ == 0) 2525 loop_start = ip; 2526 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2527 loop_depth--; 2528 2529 if (loop_depth == 0) { 2530 /* FINISHME: 2531 * 2532 * Patches up any vars marked for use within the loop as 2533 * live until the end. This is conservative, as there 2534 * will often be variables defined and used inside the 2535 * loop but dead at the end of the loop body. 2536 */ 2537 for (int i = 0; i < num_vars; i++) { 2538 if (use[i] == loop_start) { 2539 use[i] = ip; 2540 } 2541 } 2542 } 2543 } else { 2544 int eip = ip; 2545 2546 if (loop_depth) 2547 eip = loop_start; 2548 2549 for (unsigned int i = 0; i < 3; i++) { 2550 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2551 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2552 } 2553 } 2554 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2555 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2556 } 2557 } 2558 2559 ip++; 2560 } 2561 2562 talloc_free(this->virtual_grf_def); 2563 talloc_free(this->virtual_grf_use); 2564 this->virtual_grf_def = def; 2565 this->virtual_grf_use = use; 2566} 2567 2568/** 2569 * Attempts to move immediate constants into the immediate 2570 * constant slot of following instructions. 2571 * 2572 * Immediate constants are a bit tricky -- they have to be in the last 2573 * operand slot, you can't do abs/negate on them, 2574 */ 2575 2576bool 2577fs_visitor::propagate_constants() 2578{ 2579 bool progress = false; 2580 2581 foreach_iter(exec_list_iterator, iter, this->instructions) { 2582 fs_inst *inst = (fs_inst *)iter.get(); 2583 2584 if (inst->opcode != BRW_OPCODE_MOV || 2585 inst->predicated || 2586 inst->dst.file != GRF || inst->src[0].file != IMM || 2587 inst->dst.type != inst->src[0].type) 2588 continue; 2589 2590 /* Don't bother with cases where we should have had the 2591 * operation on the constant folded in GLSL already. 2592 */ 2593 if (inst->saturate) 2594 continue; 2595 2596 /* Found a move of a constant to a GRF. Find anything else using the GRF 2597 * before it's written, and replace it with the constant if we can. 2598 */ 2599 exec_list_iterator scan_iter = iter; 2600 scan_iter.next(); 2601 for (; scan_iter.has_next(); scan_iter.next()) { 2602 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2603 2604 if (scan_inst->opcode == BRW_OPCODE_DO || 2605 scan_inst->opcode == BRW_OPCODE_WHILE || 2606 scan_inst->opcode == BRW_OPCODE_ELSE || 2607 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2608 break; 2609 } 2610 2611 for (int i = 2; i >= 0; i--) { 2612 if (scan_inst->src[i].file != GRF || 2613 scan_inst->src[i].reg != inst->dst.reg || 2614 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2615 continue; 2616 2617 /* Don't bother with cases where we should have had the 2618 * operation on the constant folded in GLSL already. 2619 */ 2620 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2621 continue; 2622 2623 switch (scan_inst->opcode) { 2624 case BRW_OPCODE_MOV: 2625 scan_inst->src[i] = inst->src[0]; 2626 progress = true; 2627 break; 2628 2629 case BRW_OPCODE_MUL: 2630 case BRW_OPCODE_ADD: 2631 if (i == 1) { 2632 scan_inst->src[i] = inst->src[0]; 2633 progress = true; 2634 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2635 /* Fit this constant in by commuting the operands */ 2636 scan_inst->src[0] = scan_inst->src[1]; 2637 scan_inst->src[1] = inst->src[0]; 2638 } 2639 break; 2640 case BRW_OPCODE_CMP: 2641 if (i == 1) { 2642 scan_inst->src[i] = inst->src[0]; 2643 progress = true; 2644 } 2645 } 2646 } 2647 2648 if (scan_inst->dst.file == GRF && 2649 scan_inst->dst.reg == inst->dst.reg && 2650 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2651 scan_inst->opcode == FS_OPCODE_TEX)) { 2652 break; 2653 } 2654 } 2655 } 2656 2657 return progress; 2658} 2659/** 2660 * Must be called after calculate_live_intervales() to remove unused 2661 * writes to registers -- register allocation will fail otherwise 2662 * because something deffed but not used won't be considered to 2663 * interfere with other regs. 2664 */ 2665bool 2666fs_visitor::dead_code_eliminate() 2667{ 2668 bool progress = false; 2669 int num_vars = this->virtual_grf_next; 2670 bool dead[num_vars]; 2671 2672 for (int i = 0; i < num_vars; i++) { 2673 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; 2674 2675 if (dead[i]) { 2676 /* Mark off its interval so it won't interfere with anything. */ 2677 this->virtual_grf_def[i] = -1; 2678 this->virtual_grf_use[i] = -1; 2679 } 2680 } 2681 2682 foreach_iter(exec_list_iterator, iter, this->instructions) { 2683 fs_inst *inst = (fs_inst *)iter.get(); 2684 2685 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2686 inst->remove(); 2687 progress = true; 2688 } 2689 } 2690 2691 return progress; 2692} 2693 2694bool 2695fs_visitor::register_coalesce() 2696{ 2697 bool progress = false; 2698 2699 foreach_iter(exec_list_iterator, iter, this->instructions) { 2700 fs_inst *inst = (fs_inst *)iter.get(); 2701 2702 if (inst->opcode != BRW_OPCODE_MOV || 2703 inst->predicated || 2704 inst->saturate || 2705 inst->dst.file != GRF || inst->src[0].file != GRF || 2706 inst->dst.type != inst->src[0].type) 2707 continue; 2708 2709 /* Found a move of a GRF to a GRF. Let's see if we can coalesce 2710 * them: check for no writes to either one until the exit of the 2711 * program. 2712 */ 2713 bool interfered = false; 2714 exec_list_iterator scan_iter = iter; 2715 scan_iter.next(); 2716 for (; scan_iter.has_next(); scan_iter.next()) { 2717 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2718 2719 if (scan_inst->opcode == BRW_OPCODE_DO || 2720 scan_inst->opcode == BRW_OPCODE_WHILE || 2721 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2722 interfered = true; 2723 iter = scan_iter; 2724 break; 2725 } 2726 2727 if (scan_inst->dst.file == GRF) { 2728 if (scan_inst->dst.reg == inst->dst.reg && 2729 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2730 scan_inst->opcode == FS_OPCODE_TEX)) { 2731 interfered = true; 2732 break; 2733 } 2734 if (scan_inst->dst.reg == inst->src[0].reg && 2735 (scan_inst->dst.reg_offset == inst->src[0].reg_offset || 2736 scan_inst->opcode == FS_OPCODE_TEX)) { 2737 interfered = true; 2738 break; 2739 } 2740 } 2741 } 2742 if (interfered) { 2743 continue; 2744 } 2745 2746 /* Update live interval so we don't have to recalculate. */ 2747 this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], 2748 virtual_grf_use[inst->dst.reg]); 2749 2750 /* Rewrite the later usage to point at the source of the move to 2751 * be removed. 2752 */ 2753 for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); 2754 scan_iter.next()) { 2755 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2756 2757 for (int i = 0; i < 3; i++) { 2758 if (scan_inst->src[i].file == GRF && 2759 scan_inst->src[i].reg == inst->dst.reg && 2760 scan_inst->src[i].reg_offset == inst->dst.reg_offset) { 2761 scan_inst->src[i].reg = inst->src[0].reg; 2762 scan_inst->src[i].reg_offset = inst->src[0].reg_offset; 2763 scan_inst->src[i].abs |= inst->src[0].abs; 2764 scan_inst->src[i].negate ^= inst->src[0].negate; 2765 } 2766 } 2767 } 2768 2769 inst->remove(); 2770 progress = true; 2771 } 2772 2773 return progress; 2774} 2775 2776 2777bool 2778fs_visitor::compute_to_mrf() 2779{ 2780 bool progress = false; 2781 int next_ip = 0; 2782 2783 foreach_iter(exec_list_iterator, iter, this->instructions) { 2784 fs_inst *inst = (fs_inst *)iter.get(); 2785 2786 int ip = next_ip; 2787 next_ip++; 2788 2789 if (inst->opcode != BRW_OPCODE_MOV || 2790 inst->predicated || 2791 inst->dst.file != MRF || inst->src[0].file != GRF || 2792 inst->dst.type != inst->src[0].type || 2793 inst->src[0].abs || inst->src[0].negate) 2794 continue; 2795 2796 /* Can't compute-to-MRF this GRF if someone else was going to 2797 * read it later. 2798 */ 2799 if (this->virtual_grf_use[inst->src[0].reg] > ip) 2800 continue; 2801 2802 /* Found a move of a GRF to a MRF. Let's see if we can go 2803 * rewrite the thing that made this GRF to write into the MRF. 2804 */ 2805 bool found = false; 2806 fs_inst *scan_inst; 2807 for (scan_inst = (fs_inst *)inst->prev; 2808 scan_inst->prev != NULL; 2809 scan_inst = (fs_inst *)scan_inst->prev) { 2810 /* We don't handle flow control here. Most computation of 2811 * values that end up in MRFs are shortly before the MRF 2812 * write anyway. 2813 */ 2814 if (scan_inst->opcode == BRW_OPCODE_DO || 2815 scan_inst->opcode == BRW_OPCODE_WHILE || 2816 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2817 break; 2818 } 2819 2820 /* You can't read from an MRF, so if someone else reads our 2821 * MRF's source GRF that we wanted to rewrite, that stops us. 2822 */ 2823 bool interfered = false; 2824 for (int i = 0; i < 3; i++) { 2825 if (scan_inst->src[i].file == GRF && 2826 scan_inst->src[i].reg == inst->src[0].reg && 2827 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 2828 interfered = true; 2829 } 2830 } 2831 if (interfered) 2832 break; 2833 2834 if (scan_inst->dst.file == MRF && 2835 scan_inst->dst.hw_reg == inst->dst.hw_reg) { 2836 /* Somebody else wrote our MRF here, so we can't can't 2837 * compute-to-MRF before that. 2838 */ 2839 break; 2840 } 2841 2842 if (scan_inst->mlen > 0) { 2843 /* Found a SEND instruction, which will do some amount of 2844 * implied write that may overwrite our MRF that we were 2845 * hoping to compute-to-MRF somewhere above it. Nothing 2846 * we have implied-writes more than 2 MRFs from base_mrf, 2847 * though. 2848 */ 2849 int implied_write_len = MIN2(scan_inst->mlen, 2); 2850 if (inst->dst.hw_reg >= scan_inst->base_mrf && 2851 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { 2852 break; 2853 } 2854 } 2855 2856 if (scan_inst->dst.file == GRF && 2857 scan_inst->dst.reg == inst->src[0].reg) { 2858 /* Found the last thing to write our reg we want to turn 2859 * into a compute-to-MRF. 2860 */ 2861 2862 if (scan_inst->opcode == FS_OPCODE_TEX) { 2863 /* texturing writes several continuous regs, so we can't 2864 * compute-to-mrf that. 2865 */ 2866 break; 2867 } 2868 2869 /* If it's predicated, it (probably) didn't populate all 2870 * the channels. 2871 */ 2872 if (scan_inst->predicated) 2873 break; 2874 2875 /* SEND instructions can't have MRF as a destination. */ 2876 if (scan_inst->mlen) 2877 break; 2878 2879 if (intel->gen >= 6) { 2880 /* gen6 math instructions must have the destination be 2881 * GRF, so no compute-to-MRF for them. 2882 */ 2883 if (scan_inst->opcode == FS_OPCODE_RCP || 2884 scan_inst->opcode == FS_OPCODE_RSQ || 2885 scan_inst->opcode == FS_OPCODE_SQRT || 2886 scan_inst->opcode == FS_OPCODE_EXP2 || 2887 scan_inst->opcode == FS_OPCODE_LOG2 || 2888 scan_inst->opcode == FS_OPCODE_SIN || 2889 scan_inst->opcode == FS_OPCODE_COS || 2890 scan_inst->opcode == FS_OPCODE_POW) { 2891 break; 2892 } 2893 } 2894 2895 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 2896 /* Found the creator of our MRF's source value. */ 2897 found = true; 2898 break; 2899 } 2900 } 2901 } 2902 if (found) { 2903 scan_inst->dst.file = MRF; 2904 scan_inst->dst.hw_reg = inst->dst.hw_reg; 2905 scan_inst->saturate |= inst->saturate; 2906 inst->remove(); 2907 progress = true; 2908 } 2909 } 2910 2911 return progress; 2912} 2913 2914bool 2915fs_visitor::virtual_grf_interferes(int a, int b) 2916{ 2917 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2918 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2919 2920 /* For dead code, just check if the def interferes with the other range. */ 2921 if (this->virtual_grf_use[a] == -1) { 2922 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 2923 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 2924 } 2925 if (this->virtual_grf_use[b] == -1) { 2926 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 2927 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 2928 } 2929 2930 return start < end; 2931} 2932 2933static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2934{ 2935 struct brw_reg brw_reg; 2936 2937 switch (reg->file) { 2938 case GRF: 2939 case ARF: 2940 case MRF: 2941 brw_reg = brw_vec8_reg(reg->file, 2942 reg->hw_reg, 0); 2943 brw_reg = retype(brw_reg, reg->type); 2944 break; 2945 case IMM: 2946 switch (reg->type) { 2947 case BRW_REGISTER_TYPE_F: 2948 brw_reg = brw_imm_f(reg->imm.f); 2949 break; 2950 case BRW_REGISTER_TYPE_D: 2951 brw_reg = brw_imm_d(reg->imm.i); 2952 break; 2953 case BRW_REGISTER_TYPE_UD: 2954 brw_reg = brw_imm_ud(reg->imm.u); 2955 break; 2956 default: 2957 assert(!"not reached"); 2958 break; 2959 } 2960 break; 2961 case FIXED_HW_REG: 2962 brw_reg = reg->fixed_hw_reg; 2963 break; 2964 case BAD_FILE: 2965 /* Probably unused. */ 2966 brw_reg = brw_null_reg(); 2967 break; 2968 case UNIFORM: 2969 assert(!"not reached"); 2970 brw_reg = brw_null_reg(); 2971 break; 2972 } 2973 if (reg->abs) 2974 brw_reg = brw_abs(brw_reg); 2975 if (reg->negate) 2976 brw_reg = negate(brw_reg); 2977 2978 return brw_reg; 2979} 2980 2981void 2982fs_visitor::generate_code() 2983{ 2984 unsigned int annotation_len = 0; 2985 int last_native_inst = 0; 2986 struct brw_instruction *if_stack[16], *loop_stack[16]; 2987 int if_stack_depth = 0, loop_stack_depth = 0; 2988 int if_depth_in_loop[16]; 2989 2990 if_depth_in_loop[loop_stack_depth] = 0; 2991 2992 memset(&if_stack, 0, sizeof(if_stack)); 2993 foreach_iter(exec_list_iterator, iter, this->instructions) { 2994 fs_inst *inst = (fs_inst *)iter.get(); 2995 struct brw_reg src[3], dst; 2996 2997 for (unsigned int i = 0; i < 3; i++) { 2998 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2999 } 3000 dst = brw_reg_from_fs_reg(&inst->dst); 3001 3002 brw_set_conditionalmod(p, inst->conditional_mod); 3003 brw_set_predicate_control(p, inst->predicated); 3004 3005 switch (inst->opcode) { 3006 case BRW_OPCODE_MOV: 3007 brw_MOV(p, dst, src[0]); 3008 break; 3009 case BRW_OPCODE_ADD: 3010 brw_ADD(p, dst, src[0], src[1]); 3011 break; 3012 case BRW_OPCODE_MUL: 3013 brw_MUL(p, dst, src[0], src[1]); 3014 break; 3015 3016 case BRW_OPCODE_FRC: 3017 brw_FRC(p, dst, src[0]); 3018 break; 3019 case BRW_OPCODE_RNDD: 3020 brw_RNDD(p, dst, src[0]); 3021 break; 3022 case BRW_OPCODE_RNDE: 3023 brw_RNDE(p, dst, src[0]); 3024 break; 3025 case BRW_OPCODE_RNDZ: 3026 brw_RNDZ(p, dst, src[0]); 3027 break; 3028 3029 case BRW_OPCODE_AND: 3030 brw_AND(p, dst, src[0], src[1]); 3031 break; 3032 case BRW_OPCODE_OR: 3033 brw_OR(p, dst, src[0], src[1]); 3034 break; 3035 case BRW_OPCODE_XOR: 3036 brw_XOR(p, dst, src[0], src[1]); 3037 break; 3038 case BRW_OPCODE_NOT: 3039 brw_NOT(p, dst, src[0]); 3040 break; 3041 case BRW_OPCODE_ASR: 3042 brw_ASR(p, dst, src[0], src[1]); 3043 break; 3044 case BRW_OPCODE_SHR: 3045 brw_SHR(p, dst, src[0], src[1]); 3046 break; 3047 case BRW_OPCODE_SHL: 3048 brw_SHL(p, dst, src[0], src[1]); 3049 break; 3050 3051 case BRW_OPCODE_CMP: 3052 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 3053 break; 3054 case BRW_OPCODE_SEL: 3055 brw_SEL(p, dst, src[0], src[1]); 3056 break; 3057 3058 case BRW_OPCODE_IF: 3059 assert(if_stack_depth < 16); 3060 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 3061 if_depth_in_loop[loop_stack_depth]++; 3062 if_stack_depth++; 3063 break; 3064 case BRW_OPCODE_ELSE: 3065 if_stack[if_stack_depth - 1] = 3066 brw_ELSE(p, if_stack[if_stack_depth - 1]); 3067 break; 3068 case BRW_OPCODE_ENDIF: 3069 if_stack_depth--; 3070 brw_ENDIF(p , if_stack[if_stack_depth]); 3071 if_depth_in_loop[loop_stack_depth]--; 3072 break; 3073 3074 case BRW_OPCODE_DO: 3075 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 3076 if_depth_in_loop[loop_stack_depth] = 0; 3077 break; 3078 3079 case BRW_OPCODE_BREAK: 3080 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 3081 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3082 break; 3083 case BRW_OPCODE_CONTINUE: 3084 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 3085 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 3086 break; 3087 3088 case BRW_OPCODE_WHILE: { 3089 struct brw_instruction *inst0, *inst1; 3090 GLuint br = 1; 3091 3092 if (intel->gen >= 5) 3093 br = 2; 3094 3095 assert(loop_stack_depth > 0); 3096 loop_stack_depth--; 3097 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 3098 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 3099 while (inst0 > loop_stack[loop_stack_depth]) { 3100 inst0--; 3101 if (inst0->header.opcode == BRW_OPCODE_BREAK && 3102 inst0->bits3.if_else.jump_count == 0) { 3103 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 3104 } 3105 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 3106 inst0->bits3.if_else.jump_count == 0) { 3107 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 3108 } 3109 } 3110 } 3111 break; 3112 3113 case FS_OPCODE_RCP: 3114 case FS_OPCODE_RSQ: 3115 case FS_OPCODE_SQRT: 3116 case FS_OPCODE_EXP2: 3117 case FS_OPCODE_LOG2: 3118 case FS_OPCODE_POW: 3119 case FS_OPCODE_SIN: 3120 case FS_OPCODE_COS: 3121 generate_math(inst, dst, src); 3122 break; 3123 case FS_OPCODE_LINTERP: 3124 generate_linterp(inst, dst, src); 3125 break; 3126 case FS_OPCODE_TEX: 3127 case FS_OPCODE_TXB: 3128 case FS_OPCODE_TXL: 3129 generate_tex(inst, dst); 3130 break; 3131 case FS_OPCODE_DISCARD_NOT: 3132 generate_discard_not(inst, dst); 3133 break; 3134 case FS_OPCODE_DISCARD_AND: 3135 generate_discard_and(inst, src[0]); 3136 break; 3137 case FS_OPCODE_DDX: 3138 generate_ddx(inst, dst, src[0]); 3139 break; 3140 case FS_OPCODE_DDY: 3141 generate_ddy(inst, dst, src[0]); 3142 break; 3143 case FS_OPCODE_FB_WRITE: 3144 generate_fb_write(inst); 3145 break; 3146 default: 3147 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 3148 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 3149 brw_opcodes[inst->opcode].name); 3150 } else { 3151 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 3152 } 3153 this->fail = true; 3154 } 3155 3156 if (annotation_len < p->nr_insn) { 3157 annotation_len *= 2; 3158 if (annotation_len < 16) 3159 annotation_len = 16; 3160 3161 this->annotation_string = talloc_realloc(this->mem_ctx, 3162 annotation_string, 3163 const char *, 3164 annotation_len); 3165 this->annotation_ir = talloc_realloc(this->mem_ctx, 3166 annotation_ir, 3167 ir_instruction *, 3168 annotation_len); 3169 } 3170 3171 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 3172 this->annotation_string[i] = inst->annotation; 3173 this->annotation_ir[i] = inst->ir; 3174 } 3175 last_native_inst = p->nr_insn; 3176 } 3177} 3178 3179GLboolean 3180brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 3181{ 3182 struct brw_compile *p = &c->func; 3183 struct intel_context *intel = &brw->intel; 3184 struct gl_context *ctx = &intel->ctx; 3185 struct brw_shader *shader = NULL; 3186 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 3187 3188 if (!prog) 3189 return GL_FALSE; 3190 3191 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 3192 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 3193 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 3194 break; 3195 } 3196 } 3197 if (!shader) 3198 return GL_FALSE; 3199 3200 /* We always use 8-wide mode, at least for now. For one, flow 3201 * control only works in 8-wide. Also, when we're fragment shader 3202 * bound, we're almost always under register pressure as well, so 3203 * 8-wide would save us from the performance cliff of spilling 3204 * regs. 3205 */ 3206 c->dispatch_width = 8; 3207 3208 if (INTEL_DEBUG & DEBUG_WM) { 3209 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 3210 _mesa_print_ir(shader->ir, NULL); 3211 printf("\n"); 3212 } 3213 3214 /* Now the main event: Visit the shader IR and generate our FS IR for it. 3215 */ 3216 fs_visitor v(c, shader); 3217 3218 if (0) { 3219 v.emit_dummy_fs(); 3220 } else { 3221 v.calculate_urb_setup(); 3222 if (intel->gen < 6) 3223 v.emit_interpolation_setup_gen4(); 3224 else 3225 v.emit_interpolation_setup_gen6(); 3226 3227 /* Generate FS IR for main(). (the visitor only descends into 3228 * functions called "main"). 3229 */ 3230 foreach_iter(exec_list_iterator, iter, *shader->ir) { 3231 ir_instruction *ir = (ir_instruction *)iter.get(); 3232 v.base_ir = ir; 3233 ir->accept(&v); 3234 } 3235 3236 v.emit_fb_writes(); 3237 3238 v.split_virtual_grfs(); 3239 3240 v.assign_curb_setup(); 3241 v.assign_urb_setup(); 3242 3243 bool progress; 3244 do { 3245 progress = false; 3246 v.calculate_live_intervals(); 3247 progress = v.propagate_constants() || progress; 3248 progress = v.register_coalesce() || progress; 3249 progress = v.compute_to_mrf() || progress; 3250 progress = v.dead_code_eliminate() || progress; 3251 } while (progress); 3252 3253 if (0) 3254 v.assign_regs_trivial(); 3255 else 3256 v.assign_regs(); 3257 } 3258 3259 if (!v.fail) 3260 v.generate_code(); 3261 3262 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 3263 3264 if (v.fail) 3265 return GL_FALSE; 3266 3267 if (INTEL_DEBUG & DEBUG_WM) { 3268 const char *last_annotation_string = NULL; 3269 ir_instruction *last_annotation_ir = NULL; 3270 3271 printf("Native code for fragment shader %d:\n", prog->Name); 3272 for (unsigned int i = 0; i < p->nr_insn; i++) { 3273 if (last_annotation_ir != v.annotation_ir[i]) { 3274 last_annotation_ir = v.annotation_ir[i]; 3275 if (last_annotation_ir) { 3276 printf(" "); 3277 last_annotation_ir->print(); 3278 printf("\n"); 3279 } 3280 } 3281 if (last_annotation_string != v.annotation_string[i]) { 3282 last_annotation_string = v.annotation_string[i]; 3283 if (last_annotation_string) 3284 printf(" %s\n", last_annotation_string); 3285 } 3286 brw_disasm(stdout, &p->store[i], intel->gen); 3287 } 3288 printf("\n"); 3289 } 3290 3291 c->prog_data.total_grf = v.grf_used; 3292 c->prog_data.total_scratch = 0; 3293 3294 return GL_TRUE; 3295} 3296