brw_fs_visitor.cpp revision c0f60106df724188d6ffe7c9f21eeff22186ab25
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "glsl/glsl_types.h" 50#include "glsl/ir_optimization.h" 51#include "glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (ir->mode == ir_var_in) { 62 if (!strcmp(ir->name, "gl_FragCoord")) { 63 reg = emit_fragcoord_interpolation(ir); 64 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 65 reg = emit_frontfacing_interpolation(ir); 66 } else { 67 reg = emit_general_interpolation(ir); 68 } 69 assert(reg); 70 hash_table_insert(this->variable_ht, reg, ir); 71 return; 72 } else if (ir->mode == ir_var_out) { 73 reg = new(this->mem_ctx) fs_reg(this, ir->type); 74 75 if (ir->index > 0) { 76 assert(ir->location == FRAG_RESULT_DATA0); 77 assert(ir->index == 1); 78 this->dual_src_output = *reg; 79 } else if (ir->location == FRAG_RESULT_COLOR) { 80 /* Writing gl_FragColor outputs to all color regions. */ 81 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { 82 this->outputs[i] = *reg; 83 this->output_components[i] = 4; 84 } 85 } else if (ir->location == FRAG_RESULT_DEPTH) { 86 this->frag_depth = ir; 87 } else { 88 /* gl_FragData or a user-defined FS output */ 89 assert(ir->location >= FRAG_RESULT_DATA0 && 90 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); 91 92 int vector_elements = 93 ir->type->is_array() ? ir->type->fields.array->vector_elements 94 : ir->type->vector_elements; 95 96 /* General color output. */ 97 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { 98 int output = ir->location - FRAG_RESULT_DATA0 + i; 99 this->outputs[output] = *reg; 100 this->outputs[output].reg_offset += vector_elements * i; 101 this->output_components[output] = vector_elements; 102 } 103 } 104 } else if (ir->mode == ir_var_uniform) { 105 int param_index = c->prog_data.nr_params; 106 107 if (c->dispatch_width == 16) { 108 if (!variable_storage(ir)) { 109 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 110 } 111 return; 112 } 113 114 if (!strncmp(ir->name, "gl_", 3)) { 115 setup_builtin_uniform_values(ir); 116 } else { 117 setup_uniform_values(ir->location, ir->type); 118 } 119 120 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 121 reg->type = brw_type_for_base_type(ir->type); 122 } 123 124 if (!reg) 125 reg = new(this->mem_ctx) fs_reg(this, ir->type); 126 127 hash_table_insert(this->variable_ht, reg, ir); 128} 129 130void 131fs_visitor::visit(ir_dereference_variable *ir) 132{ 133 fs_reg *reg = variable_storage(ir->var); 134 this->result = *reg; 135} 136 137void 138fs_visitor::visit(ir_dereference_record *ir) 139{ 140 const glsl_type *struct_type = ir->record->type; 141 142 ir->record->accept(this); 143 144 unsigned int offset = 0; 145 for (unsigned int i = 0; i < struct_type->length; i++) { 146 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 147 break; 148 offset += type_size(struct_type->fields.structure[i].type); 149 } 150 this->result.reg_offset += offset; 151 this->result.type = brw_type_for_base_type(ir->type); 152} 153 154void 155fs_visitor::visit(ir_dereference_array *ir) 156{ 157 ir_constant *index; 158 int element_size; 159 160 ir->array->accept(this); 161 index = ir->array_index->as_constant(); 162 163 element_size = type_size(ir->type); 164 this->result.type = brw_type_for_base_type(ir->type); 165 166 if (index) { 167 assert(this->result.file == UNIFORM || this->result.file == GRF); 168 this->result.reg_offset += index->value.i[0] * element_size; 169 } else { 170 assert(!"FINISHME: non-constant array element"); 171 } 172} 173 174/* Instruction selection: Produce a MOV.sat instead of 175 * MIN(MAX(val, 0), 1) when possible. 176 */ 177bool 178fs_visitor::try_emit_saturate(ir_expression *ir) 179{ 180 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 181 182 if (!sat_val) 183 return false; 184 185 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); 186 187 sat_val->accept(this); 188 fs_reg src = this->result; 189 190 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); 191 192 /* If the last instruction from our accept() didn't generate our 193 * src, generate a saturated MOV 194 */ 195 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); 196 if (!modify || modify->regs_written() != 1) { 197 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 198 inst->saturate = true; 199 } else { 200 modify->saturate = true; 201 this->result = src; 202 } 203 204 205 return true; 206} 207 208bool 209fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) 210{ 211 /* 3-src instructions were introduced in gen6. */ 212 if (intel->gen < 6) 213 return false; 214 215 /* MAD can only handle floating-point data. */ 216 if (ir->type != glsl_type::float_type) 217 return false; 218 219 ir_rvalue *nonmul = ir->operands[1 - mul_arg]; 220 ir_expression *mul = ir->operands[mul_arg]->as_expression(); 221 222 if (!mul || mul->operation != ir_binop_mul) 223 return false; 224 225 if (nonmul->as_constant() || 226 mul->operands[0]->as_constant() || 227 mul->operands[1]->as_constant()) 228 return false; 229 230 nonmul->accept(this); 231 fs_reg src0 = this->result; 232 233 mul->operands[0]->accept(this); 234 fs_reg src1 = this->result; 235 236 mul->operands[1]->accept(this); 237 fs_reg src2 = this->result; 238 239 this->result = fs_reg(this, ir->type); 240 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); 241 242 return true; 243} 244 245void 246fs_visitor::visit(ir_expression *ir) 247{ 248 unsigned int operand; 249 fs_reg op[2], temp; 250 fs_inst *inst; 251 252 assert(ir->get_num_operands() <= 2); 253 254 if (try_emit_saturate(ir)) 255 return; 256 if (ir->operation == ir_binop_add) { 257 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) 258 return; 259 } 260 261 for (operand = 0; operand < ir->get_num_operands(); operand++) { 262 ir->operands[operand]->accept(this); 263 if (this->result.file == BAD_FILE) { 264 ir_print_visitor v; 265 fail("Failed to get tree for expression operand:\n"); 266 ir->operands[operand]->accept(&v); 267 } 268 op[operand] = this->result; 269 270 /* Matrix expression operands should have been broken down to vector 271 * operations already. 272 */ 273 assert(!ir->operands[operand]->type->is_matrix()); 274 /* And then those vector operands should have been broken down to scalar. 275 */ 276 assert(!ir->operands[operand]->type->is_vector()); 277 } 278 279 /* Storage for our result. If our result goes into an assignment, it will 280 * just get copy-propagated out, so no worries. 281 */ 282 this->result = fs_reg(this, ir->type); 283 284 switch (ir->operation) { 285 case ir_unop_logic_not: 286 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 287 * ones complement of the whole register, not just bit 0. 288 */ 289 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 290 break; 291 case ir_unop_neg: 292 op[0].negate = !op[0].negate; 293 this->result = op[0]; 294 break; 295 case ir_unop_abs: 296 op[0].abs = true; 297 op[0].negate = false; 298 this->result = op[0]; 299 break; 300 case ir_unop_sign: 301 temp = fs_reg(this, ir->type); 302 303 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 304 305 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 306 inst->conditional_mod = BRW_CONDITIONAL_G; 307 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 308 inst->predicated = true; 309 310 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 311 inst->conditional_mod = BRW_CONDITIONAL_L; 312 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 313 inst->predicated = true; 314 315 break; 316 case ir_unop_rcp: 317 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 318 break; 319 320 case ir_unop_exp2: 321 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 322 break; 323 case ir_unop_log2: 324 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 325 break; 326 case ir_unop_exp: 327 case ir_unop_log: 328 assert(!"not reached: should be handled by ir_explog_to_explog2"); 329 break; 330 case ir_unop_sin: 331 case ir_unop_sin_reduced: 332 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 333 break; 334 case ir_unop_cos: 335 case ir_unop_cos_reduced: 336 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 337 break; 338 339 case ir_unop_dFdx: 340 emit(FS_OPCODE_DDX, this->result, op[0]); 341 break; 342 case ir_unop_dFdy: 343 emit(FS_OPCODE_DDY, this->result, op[0]); 344 break; 345 346 case ir_binop_add: 347 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 348 break; 349 case ir_binop_sub: 350 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 351 break; 352 353 case ir_binop_mul: 354 if (ir->type->is_integer()) { 355 /* For integer multiplication, the MUL uses the low 16 bits 356 * of one of the operands (src0 on gen6, src1 on gen7). The 357 * MACH accumulates in the contribution of the upper 16 bits 358 * of that operand. 359 * 360 * FINISHME: Emit just the MUL if we know an operand is small 361 * enough. 362 */ 363 if (intel->gen >= 7 && c->dispatch_width == 16) 364 fail("16-wide explicit accumulator operands unsupported\n"); 365 366 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); 367 368 emit(BRW_OPCODE_MUL, acc, op[0], op[1]); 369 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); 370 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); 371 } else { 372 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 373 } 374 break; 375 case ir_binop_div: 376 if (intel->gen >= 7 && c->dispatch_width == 16) 377 fail("16-wide INTDIV unsupported\n"); 378 379 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ 380 assert(ir->type->is_integer()); 381 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); 382 break; 383 case ir_binop_mod: 384 if (intel->gen >= 7 && c->dispatch_width == 16) 385 fail("16-wide INTDIV unsupported\n"); 386 387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 388 assert(ir->type->is_integer()); 389 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); 390 break; 391 392 case ir_binop_less: 393 case ir_binop_greater: 394 case ir_binop_lequal: 395 case ir_binop_gequal: 396 case ir_binop_equal: 397 case ir_binop_all_equal: 398 case ir_binop_nequal: 399 case ir_binop_any_nequal: 400 temp = this->result; 401 /* original gen4 does implicit conversion before comparison. */ 402 if (intel->gen < 5) 403 temp.type = op[0].type; 404 405 resolve_ud_negate(&op[0]); 406 resolve_ud_negate(&op[1]); 407 408 resolve_bool_comparison(ir->operands[0], &op[0]); 409 resolve_bool_comparison(ir->operands[1], &op[1]); 410 411 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 412 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 413 break; 414 415 case ir_binop_logic_xor: 416 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 417 break; 418 419 case ir_binop_logic_or: 420 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 421 break; 422 423 case ir_binop_logic_and: 424 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 425 break; 426 427 case ir_binop_dot: 428 case ir_unop_any: 429 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 430 break; 431 432 case ir_unop_noise: 433 assert(!"not reached: should be handled by lower_noise"); 434 break; 435 436 case ir_quadop_vector: 437 assert(!"not reached: should be handled by lower_quadop_vector"); 438 break; 439 440 case ir_unop_sqrt: 441 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 442 break; 443 444 case ir_unop_rsq: 445 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 446 break; 447 448 case ir_unop_bitcast_i2f: 449 case ir_unop_bitcast_u2f: 450 op[0].type = BRW_REGISTER_TYPE_F; 451 this->result = op[0]; 452 break; 453 case ir_unop_i2u: 454 case ir_unop_bitcast_f2u: 455 op[0].type = BRW_REGISTER_TYPE_UD; 456 this->result = op[0]; 457 break; 458 case ir_unop_u2i: 459 case ir_unop_bitcast_f2i: 460 op[0].type = BRW_REGISTER_TYPE_D; 461 this->result = op[0]; 462 break; 463 case ir_unop_i2f: 464 case ir_unop_u2f: 465 case ir_unop_f2i: 466 case ir_unop_f2u: 467 emit(BRW_OPCODE_MOV, this->result, op[0]); 468 break; 469 470 case ir_unop_b2i: 471 inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1)); 472 break; 473 case ir_unop_b2f: 474 temp = fs_reg(this, glsl_type::int_type); 475 emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1)); 476 emit(BRW_OPCODE_MOV, this->result, temp); 477 break; 478 479 case ir_unop_f2b: 480 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)); 481 inst->conditional_mod = BRW_CONDITIONAL_NZ; 482 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 483 break; 484 case ir_unop_i2b: 485 assert(op[0].type == BRW_REGISTER_TYPE_D); 486 487 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0)); 488 inst->conditional_mod = BRW_CONDITIONAL_NZ; 489 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 490 break; 491 492 case ir_unop_trunc: 493 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 494 break; 495 case ir_unop_ceil: 496 op[0].negate = !op[0].negate; 497 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 498 this->result.negate = true; 499 break; 500 case ir_unop_floor: 501 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 502 break; 503 case ir_unop_fract: 504 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 505 break; 506 case ir_unop_round_even: 507 emit(BRW_OPCODE_RNDE, this->result, op[0]); 508 break; 509 510 case ir_binop_min: 511 resolve_ud_negate(&op[0]); 512 resolve_ud_negate(&op[1]); 513 514 if (intel->gen >= 6) { 515 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 516 inst->conditional_mod = BRW_CONDITIONAL_L; 517 } else { 518 /* Unalias the destination */ 519 this->result = fs_reg(this, ir->type); 520 521 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 522 inst->conditional_mod = BRW_CONDITIONAL_L; 523 524 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 525 inst->predicated = true; 526 } 527 break; 528 case ir_binop_max: 529 resolve_ud_negate(&op[0]); 530 resolve_ud_negate(&op[1]); 531 532 if (intel->gen >= 6) { 533 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 534 inst->conditional_mod = BRW_CONDITIONAL_GE; 535 } else { 536 /* Unalias the destination */ 537 this->result = fs_reg(this, ir->type); 538 539 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 540 inst->conditional_mod = BRW_CONDITIONAL_G; 541 542 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 543 inst->predicated = true; 544 } 545 break; 546 547 case ir_binop_pow: 548 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 549 break; 550 551 case ir_unop_bit_not: 552 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 553 break; 554 case ir_binop_bit_and: 555 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 556 break; 557 case ir_binop_bit_xor: 558 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 559 break; 560 case ir_binop_bit_or: 561 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 562 break; 563 564 case ir_binop_lshift: 565 inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]); 566 break; 567 568 case ir_binop_rshift: 569 if (ir->type->base_type == GLSL_TYPE_INT) 570 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]); 571 else 572 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]); 573 break; 574 } 575} 576 577void 578fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 579 const glsl_type *type, bool predicated) 580{ 581 switch (type->base_type) { 582 case GLSL_TYPE_FLOAT: 583 case GLSL_TYPE_UINT: 584 case GLSL_TYPE_INT: 585 case GLSL_TYPE_BOOL: 586 for (unsigned int i = 0; i < type->components(); i++) { 587 l.type = brw_type_for_base_type(type); 588 r.type = brw_type_for_base_type(type); 589 590 if (predicated || !l.equals(r)) { 591 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 592 inst->predicated = predicated; 593 } 594 595 l.reg_offset++; 596 r.reg_offset++; 597 } 598 break; 599 case GLSL_TYPE_ARRAY: 600 for (unsigned int i = 0; i < type->length; i++) { 601 emit_assignment_writes(l, r, type->fields.array, predicated); 602 } 603 break; 604 605 case GLSL_TYPE_STRUCT: 606 for (unsigned int i = 0; i < type->length; i++) { 607 emit_assignment_writes(l, r, type->fields.structure[i].type, 608 predicated); 609 } 610 break; 611 612 case GLSL_TYPE_SAMPLER: 613 break; 614 615 default: 616 assert(!"not reached"); 617 break; 618 } 619} 620 621/* If the RHS processing resulted in an instruction generating a 622 * temporary value, and it would be easy to rewrite the instruction to 623 * generate its result right into the LHS instead, do so. This ends 624 * up reliably removing instructions where it can be tricky to do so 625 * later without real UD chain information. 626 */ 627bool 628fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 629 fs_reg dst, 630 fs_reg src, 631 fs_inst *pre_rhs_inst, 632 fs_inst *last_rhs_inst) 633{ 634 /* Only attempt if we're doing a direct assignment. */ 635 if (ir->condition || 636 !(ir->lhs->type->is_scalar() || 637 (ir->lhs->type->is_vector() && 638 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) 639 return false; 640 641 /* Make sure the last instruction generated our source reg. */ 642 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, 643 last_rhs_inst, 644 src); 645 if (!modify) 646 return false; 647 648 /* If last_rhs_inst wrote a different number of components than our LHS, 649 * we can't safely rewrite it. 650 */ 651 if (ir->lhs->type->vector_elements != modify->regs_written()) 652 return false; 653 654 /* Success! Rewrite the instruction. */ 655 modify->dst = dst; 656 657 return true; 658} 659 660void 661fs_visitor::visit(ir_assignment *ir) 662{ 663 fs_reg l, r; 664 fs_inst *inst; 665 666 /* FINISHME: arrays on the lhs */ 667 ir->lhs->accept(this); 668 l = this->result; 669 670 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); 671 672 ir->rhs->accept(this); 673 r = this->result; 674 675 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); 676 677 assert(l.file != BAD_FILE); 678 assert(r.file != BAD_FILE); 679 680 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) 681 return; 682 683 if (ir->condition) { 684 emit_bool_to_cond_code(ir->condition); 685 } 686 687 if (ir->lhs->type->is_scalar() || 688 ir->lhs->type->is_vector()) { 689 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 690 if (ir->write_mask & (1 << i)) { 691 inst = emit(BRW_OPCODE_MOV, l, r); 692 if (ir->condition) 693 inst->predicated = true; 694 r.reg_offset++; 695 } 696 l.reg_offset++; 697 } 698 } else { 699 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 700 } 701} 702 703fs_inst * 704fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 705 fs_reg shadow_c, fs_reg lod, fs_reg dPdy, 706 int sampler) 707{ 708 int mlen; 709 int base_mrf = 1; 710 bool simd16 = false; 711 fs_reg orig_dst; 712 713 /* g0 header. */ 714 mlen = 1; 715 716 if (ir->shadow_comparitor) { 717 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 718 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 719 coordinate.reg_offset++; 720 } 721 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 722 mlen += 3; 723 724 if (ir->op == ir_tex) { 725 /* There's no plain shadow compare message, so we use shadow 726 * compare with a bias of 0.0. 727 */ 728 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 729 mlen++; 730 } else if (ir->op == ir_txb || ir->op == ir_txl) { 731 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 732 mlen++; 733 } else { 734 assert(!"Should not get here."); 735 } 736 737 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 738 mlen++; 739 } else if (ir->op == ir_tex) { 740 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 741 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 742 coordinate.reg_offset++; 743 } 744 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 745 mlen += 3; 746 } else if (ir->op == ir_txd) { 747 fs_reg &dPdx = lod; 748 749 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 750 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 751 coordinate.reg_offset++; 752 } 753 /* the slots for u and v are always present, but r is optional */ 754 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 755 756 /* P = u, v, r 757 * dPdx = dudx, dvdx, drdx 758 * dPdy = dudy, dvdy, drdy 759 * 760 * 1-arg: Does not exist. 761 * 762 * 2-arg: dudx dvdx dudy dvdy 763 * dPdx.x dPdx.y dPdy.x dPdy.y 764 * m4 m5 m6 m7 765 * 766 * 3-arg: dudx dvdx drdx dudy dvdy drdy 767 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 768 * m5 m6 m7 m8 m9 m10 769 */ 770 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 771 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 772 dPdx.reg_offset++; 773 } 774 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 775 776 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 777 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 778 dPdy.reg_offset++; 779 } 780 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 781 } else if (ir->op == ir_txs) { 782 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ 783 simd16 = true; 784 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 785 mlen += 2; 786 } else { 787 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 788 * instructions. We'll need to do SIMD16 here. 789 */ 790 simd16 = true; 791 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); 792 793 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 794 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), 795 coordinate); 796 coordinate.reg_offset++; 797 } 798 799 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to 800 * be necessary for TXF (ld), but seems wise to do for all messages. 801 */ 802 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { 803 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); 804 } 805 806 /* lod/bias appears after u/v/r. */ 807 mlen += 6; 808 809 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, lod.type), lod); 810 mlen++; 811 812 /* The unused upper half. */ 813 mlen++; 814 } 815 816 if (simd16) { 817 /* Now, since we're doing simd16, the return is 2 interleaved 818 * vec4s where the odd-indexed ones are junk. We'll need to move 819 * this weirdness around to the expected layout. 820 */ 821 orig_dst = dst; 822 const glsl_type *vec_type = 823 glsl_type::get_instance(ir->type->base_type, 4, 1); 824 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); 825 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) 826 : BRW_REGISTER_TYPE_F; 827 } 828 829 fs_inst *inst = NULL; 830 switch (ir->op) { 831 case ir_tex: 832 inst = emit(SHADER_OPCODE_TEX, dst); 833 break; 834 case ir_txb: 835 inst = emit(FS_OPCODE_TXB, dst); 836 break; 837 case ir_txl: 838 inst = emit(SHADER_OPCODE_TXL, dst); 839 break; 840 case ir_txd: 841 inst = emit(SHADER_OPCODE_TXD, dst); 842 break; 843 case ir_txs: 844 inst = emit(SHADER_OPCODE_TXS, dst); 845 break; 846 case ir_txf: 847 inst = emit(SHADER_OPCODE_TXF, dst); 848 break; 849 } 850 inst->base_mrf = base_mrf; 851 inst->mlen = mlen; 852 inst->header_present = true; 853 854 if (simd16) { 855 for (int i = 0; i < 4; i++) { 856 emit(BRW_OPCODE_MOV, orig_dst, dst); 857 orig_dst.reg_offset++; 858 dst.reg_offset += 2; 859 } 860 } 861 862 return inst; 863} 864 865/* gen5's sampler has slots for u, v, r, array index, then optional 866 * parameters like shadow comparitor or LOD bias. If optional 867 * parameters aren't present, those base slots are optional and don't 868 * need to be included in the message. 869 * 870 * We don't fill in the unnecessary slots regardless, which may look 871 * surprising in the disassembly. 872 */ 873fs_inst * 874fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 875 fs_reg shadow_c, fs_reg lod, fs_reg lod2, 876 int sampler) 877{ 878 int mlen = 0; 879 int base_mrf = 2; 880 int reg_width = c->dispatch_width / 8; 881 bool header_present = false; 882 const int vector_elements = 883 ir->coordinate ? ir->coordinate->type->vector_elements : 0; 884 885 if (ir->offset != NULL && ir->op == ir_txf) { 886 /* It appears that the ld instruction used for txf does its 887 * address bounds check before adding in the offset. To work 888 * around this, just add the integer offset to the integer texel 889 * coordinate, and don't put the offset in the header. 890 */ 891 ir_constant *offset = ir->offset->as_constant(); 892 for (int i = 0; i < vector_elements; i++) { 893 emit(BRW_OPCODE_ADD, 894 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 895 coordinate, 896 offset->value.i[i]); 897 coordinate.reg_offset++; 898 } 899 } else { 900 if (ir->offset) { 901 /* The offsets set up by the ir_texture visitor are in the 902 * m1 header, so we can't go headerless. 903 */ 904 header_present = true; 905 mlen++; 906 base_mrf--; 907 } 908 909 for (int i = 0; i < vector_elements; i++) { 910 emit(BRW_OPCODE_MOV, 911 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 912 coordinate); 913 coordinate.reg_offset++; 914 } 915 } 916 mlen += vector_elements * reg_width; 917 918 if (ir->shadow_comparitor) { 919 mlen = MAX2(mlen, header_present + 4 * reg_width); 920 921 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 922 mlen += reg_width; 923 } 924 925 fs_inst *inst = NULL; 926 switch (ir->op) { 927 case ir_tex: 928 inst = emit(SHADER_OPCODE_TEX, dst); 929 break; 930 case ir_txb: 931 mlen = MAX2(mlen, header_present + 4 * reg_width); 932 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 933 mlen += reg_width; 934 935 inst = emit(FS_OPCODE_TXB, dst); 936 break; 937 case ir_txl: 938 mlen = MAX2(mlen, header_present + 4 * reg_width); 939 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 940 mlen += reg_width; 941 942 inst = emit(SHADER_OPCODE_TXL, dst); 943 break; 944 case ir_txd: { 945 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 946 947 /** 948 * P = u, v, r 949 * dPdx = dudx, dvdx, drdx 950 * dPdy = dudy, dvdy, drdy 951 * 952 * Load up these values: 953 * - dudx dudy dvdx dvdy drdx drdy 954 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 955 */ 956 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 957 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 958 lod.reg_offset++; 959 mlen += reg_width; 960 961 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2); 962 lod2.reg_offset++; 963 mlen += reg_width; 964 } 965 966 inst = emit(SHADER_OPCODE_TXD, dst); 967 break; 968 } 969 case ir_txs: 970 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 971 mlen += reg_width; 972 inst = emit(SHADER_OPCODE_TXS, dst); 973 break; 974 case ir_txf: 975 mlen = header_present + 4 * reg_width; 976 977 emit(BRW_OPCODE_MOV, 978 fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), 979 lod); 980 inst = emit(SHADER_OPCODE_TXF, dst); 981 break; 982 } 983 inst->base_mrf = base_mrf; 984 inst->mlen = mlen; 985 inst->header_present = header_present; 986 987 if (mlen > 11) { 988 fail("Message length >11 disallowed by hardware\n"); 989 } 990 991 return inst; 992} 993 994fs_inst * 995fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 996 fs_reg shadow_c, fs_reg lod, fs_reg lod2, 997 int sampler) 998{ 999 int mlen = 0; 1000 int base_mrf = 2; 1001 int reg_width = c->dispatch_width / 8; 1002 bool header_present = false; 1003 int offsets[3]; 1004 1005 if (ir->offset && ir->op != ir_txf) { 1006 /* The offsets set up by the ir_texture visitor are in the 1007 * m1 header, so we can't go headerless. 1008 */ 1009 header_present = true; 1010 mlen++; 1011 base_mrf--; 1012 } 1013 1014 if (ir->shadow_comparitor) { 1015 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 1016 mlen += reg_width; 1017 } 1018 1019 /* Set up the LOD info */ 1020 switch (ir->op) { 1021 case ir_tex: 1022 break; 1023 case ir_txb: 1024 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1025 mlen += reg_width; 1026 break; 1027 case ir_txl: 1028 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1029 mlen += reg_width; 1030 break; 1031 case ir_txd: { 1032 if (c->dispatch_width == 16) 1033 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 1034 1035 /* Load dPdx and the coordinate together: 1036 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 1037 */ 1038 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1039 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1040 coordinate.reg_offset++; 1041 mlen += reg_width; 1042 1043 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1044 lod.reg_offset++; 1045 mlen += reg_width; 1046 1047 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2); 1048 lod2.reg_offset++; 1049 mlen += reg_width; 1050 } 1051 break; 1052 } 1053 case ir_txs: 1054 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 1055 mlen += reg_width; 1056 break; 1057 case ir_txf: 1058 /* It appears that the ld instruction used for txf does its 1059 * address bounds check before adding in the offset. To work 1060 * around this, just add the integer offset to the integer texel 1061 * coordinate, and don't put the offset in the header. 1062 */ 1063 if (ir->offset) { 1064 ir_constant *offset = ir->offset->as_constant(); 1065 offsets[0] = offset->value.i[0]; 1066 offsets[1] = offset->value.i[1]; 1067 offsets[2] = offset->value.i[2]; 1068 } else { 1069 memset(offsets, 0, sizeof(offsets)); 1070 } 1071 1072 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ 1073 emit(BRW_OPCODE_ADD, 1074 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]); 1075 coordinate.reg_offset++; 1076 mlen += reg_width; 1077 1078 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod); 1079 mlen += reg_width; 1080 1081 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { 1082 emit(BRW_OPCODE_ADD, 1083 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]); 1084 coordinate.reg_offset++; 1085 mlen += reg_width; 1086 } 1087 break; 1088 } 1089 1090 /* Set up the coordinate (except for cases where it was done above) */ 1091 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { 1092 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1093 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1094 coordinate.reg_offset++; 1095 mlen += reg_width; 1096 } 1097 } 1098 1099 /* Generate the SEND */ 1100 fs_inst *inst = NULL; 1101 switch (ir->op) { 1102 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; 1103 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 1104 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; 1105 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; 1106 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; 1107 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; 1108 } 1109 inst->base_mrf = base_mrf; 1110 inst->mlen = mlen; 1111 inst->header_present = header_present; 1112 1113 if (mlen > 11) { 1114 fail("Message length >11 disallowed by hardware\n"); 1115 } 1116 1117 return inst; 1118} 1119 1120/** 1121 * Emit code to produce the coordinates for a texture lookup. 1122 * 1123 * Returns the fs_reg containing the texture coordinate (as opposed to 1124 * setting this->result). 1125 */ 1126fs_reg 1127fs_visitor::emit_texcoord(ir_texture *ir, int sampler) 1128{ 1129 fs_inst *inst = NULL; 1130 1131 if (!ir->coordinate) 1132 return fs_reg(); /* Return the default BAD_FILE register. */ 1133 1134 ir->coordinate->accept(this); 1135 fs_reg coordinate = this->result; 1136 1137 bool needs_gl_clamp = true; 1138 1139 fs_reg scale_x, scale_y; 1140 1141 /* The 965 requires the EU to do the normalization of GL rectangle 1142 * texture coordinates. We use the program parameter state 1143 * tracking to get the scaling factor. 1144 */ 1145 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT && 1146 (intel->gen < 6 || 1147 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || 1148 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { 1149 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1150 int tokens[STATE_LENGTH] = { 1151 STATE_INTERNAL, 1152 STATE_TEXRECT_SCALE, 1153 sampler, 1154 0, 1155 0 1156 }; 1157 1158 if (c->dispatch_width == 16) { 1159 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1160 return fs_reg(this, ir->type); 1161 } 1162 1163 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1164 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1165 1166 GLuint index = _mesa_add_state_reference(params, 1167 (gl_state_index *)tokens); 1168 1169 this->param_index[c->prog_data.nr_params] = index; 1170 this->param_offset[c->prog_data.nr_params] = 0; 1171 c->prog_data.nr_params++; 1172 this->param_index[c->prog_data.nr_params] = index; 1173 this->param_offset[c->prog_data.nr_params] = 1; 1174 c->prog_data.nr_params++; 1175 } 1176 1177 /* The 965 requires the EU to do the normalization of GL rectangle 1178 * texture coordinates. We use the program parameter state 1179 * tracking to get the scaling factor. 1180 */ 1181 if (intel->gen < 6 && 1182 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1183 fs_reg dst = fs_reg(this, ir->coordinate->type); 1184 fs_reg src = coordinate; 1185 coordinate = dst; 1186 1187 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1188 dst.reg_offset++; 1189 src.reg_offset++; 1190 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1191 } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1192 /* On gen6+, the sampler handles the rectangle coordinates 1193 * natively, without needing rescaling. But that means we have 1194 * to do GL_CLAMP clamping at the [0, width], [0, height] scale, 1195 * not [0, 1] like the default case below. 1196 */ 1197 needs_gl_clamp = false; 1198 1199 for (int i = 0; i < 2; i++) { 1200 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1201 fs_reg chan = coordinate; 1202 chan.reg_offset += i; 1203 1204 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); 1205 inst->conditional_mod = BRW_CONDITIONAL_G; 1206 1207 /* Our parameter comes in as 1.0/width or 1.0/height, 1208 * because that's what people normally want for doing 1209 * texture rectangle handling. We need width or height 1210 * for clamping, but we don't care enough to make a new 1211 * parameter type, so just invert back. 1212 */ 1213 fs_reg limit = fs_reg(this, glsl_type::float_type); 1214 emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y); 1215 emit(SHADER_OPCODE_RCP, limit, limit); 1216 1217 inst = emit(BRW_OPCODE_SEL, chan, chan, limit); 1218 inst->conditional_mod = BRW_CONDITIONAL_L; 1219 } 1220 } 1221 } 1222 1223 if (ir->coordinate && needs_gl_clamp) { 1224 for (unsigned int i = 0; 1225 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { 1226 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1227 fs_reg chan = coordinate; 1228 chan.reg_offset += i; 1229 1230 fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan); 1231 inst->saturate = true; 1232 } 1233 } 1234 } 1235 return coordinate; 1236} 1237 1238void 1239fs_visitor::visit(ir_texture *ir) 1240{ 1241 fs_inst *inst = NULL; 1242 1243 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 1244 sampler = fp->Base.SamplerUnits[sampler]; 1245 1246 /* Should be lowered by do_lower_texture_projection */ 1247 assert(!ir->projector); 1248 1249 /* Generate code to compute all the subexpression trees. This has to be 1250 * done before loading any values into MRFs for the sampler message since 1251 * generating these values may involve SEND messages that need the MRFs. 1252 */ 1253 fs_reg coordinate = emit_texcoord(ir, sampler); 1254 1255 fs_reg shadow_comparitor; 1256 if (ir->shadow_comparitor) { 1257 ir->shadow_comparitor->accept(this); 1258 shadow_comparitor = this->result; 1259 } 1260 1261 fs_reg lod, lod2; 1262 switch (ir->op) { 1263 case ir_tex: 1264 break; 1265 case ir_txb: 1266 ir->lod_info.bias->accept(this); 1267 lod = this->result; 1268 break; 1269 case ir_txd: 1270 ir->lod_info.grad.dPdx->accept(this); 1271 lod = this->result; 1272 1273 ir->lod_info.grad.dPdy->accept(this); 1274 lod2 = this->result; 1275 break; 1276 case ir_txf: 1277 case ir_txl: 1278 case ir_txs: 1279 ir->lod_info.lod->accept(this); 1280 lod = this->result; 1281 break; 1282 }; 1283 1284 /* Writemasking doesn't eliminate channels on SIMD8 texture 1285 * samples, so don't worry about them. 1286 */ 1287 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); 1288 1289 if (intel->gen >= 7) { 1290 inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, 1291 lod, lod2, sampler); 1292 } else if (intel->gen >= 5) { 1293 inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, 1294 lod, lod2, sampler); 1295 } else { 1296 inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, 1297 lod, lod2, sampler); 1298 } 1299 1300 /* The header is set up by generate_tex() when necessary. */ 1301 inst->src[0] = reg_undef; 1302 1303 if (ir->offset != NULL && ir->op != ir_txf) 1304 inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); 1305 1306 inst->sampler = sampler; 1307 1308 if (ir->shadow_comparitor) 1309 inst->shadow_compare = true; 1310 1311 swizzle_result(ir, dst, sampler); 1312} 1313 1314/** 1315 * Swizzle the result of a texture result. This is necessary for 1316 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1317 */ 1318void 1319fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1320{ 1321 this->result = orig_val; 1322 1323 if (ir->op == ir_txs) 1324 return; 1325 1326 if (ir->type == glsl_type::float_type) { 1327 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1328 assert(ir->sampler->type->sampler_shadow); 1329 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { 1330 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1331 1332 for (int i = 0; i < 4; i++) { 1333 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); 1334 fs_reg l = swizzled_result; 1335 l.reg_offset += i; 1336 1337 if (swiz == SWIZZLE_ZERO) { 1338 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1339 } else if (swiz == SWIZZLE_ONE) { 1340 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1341 } else { 1342 fs_reg r = orig_val; 1343 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); 1344 emit(BRW_OPCODE_MOV, l, r); 1345 } 1346 } 1347 this->result = swizzled_result; 1348 } 1349} 1350 1351void 1352fs_visitor::visit(ir_swizzle *ir) 1353{ 1354 ir->val->accept(this); 1355 fs_reg val = this->result; 1356 1357 if (ir->type->vector_elements == 1) { 1358 this->result.reg_offset += ir->mask.x; 1359 return; 1360 } 1361 1362 fs_reg result = fs_reg(this, ir->type); 1363 this->result = result; 1364 1365 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1366 fs_reg channel = val; 1367 int swiz = 0; 1368 1369 switch (i) { 1370 case 0: 1371 swiz = ir->mask.x; 1372 break; 1373 case 1: 1374 swiz = ir->mask.y; 1375 break; 1376 case 2: 1377 swiz = ir->mask.z; 1378 break; 1379 case 3: 1380 swiz = ir->mask.w; 1381 break; 1382 } 1383 1384 channel.reg_offset += swiz; 1385 emit(BRW_OPCODE_MOV, result, channel); 1386 result.reg_offset++; 1387 } 1388} 1389 1390void 1391fs_visitor::visit(ir_discard *ir) 1392{ 1393 assert(ir->condition == NULL); /* FINISHME */ 1394 1395 emit(FS_OPCODE_DISCARD); 1396} 1397 1398void 1399fs_visitor::visit(ir_constant *ir) 1400{ 1401 /* Set this->result to reg at the bottom of the function because some code 1402 * paths will cause this visitor to be applied to other fields. This will 1403 * cause the value stored in this->result to be modified. 1404 * 1405 * Make reg constant so that it doesn't get accidentally modified along the 1406 * way. Yes, I actually had this problem. :( 1407 */ 1408 const fs_reg reg(this, ir->type); 1409 fs_reg dst_reg = reg; 1410 1411 if (ir->type->is_array()) { 1412 const unsigned size = type_size(ir->type->fields.array); 1413 1414 for (unsigned i = 0; i < ir->type->length; i++) { 1415 ir->array_elements[i]->accept(this); 1416 fs_reg src_reg = this->result; 1417 1418 dst_reg.type = src_reg.type; 1419 for (unsigned j = 0; j < size; j++) { 1420 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1421 src_reg.reg_offset++; 1422 dst_reg.reg_offset++; 1423 } 1424 } 1425 } else if (ir->type->is_record()) { 1426 foreach_list(node, &ir->components) { 1427 ir_constant *const field = (ir_constant *) node; 1428 const unsigned size = type_size(field->type); 1429 1430 field->accept(this); 1431 fs_reg src_reg = this->result; 1432 1433 dst_reg.type = src_reg.type; 1434 for (unsigned j = 0; j < size; j++) { 1435 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1436 src_reg.reg_offset++; 1437 dst_reg.reg_offset++; 1438 } 1439 } 1440 } else { 1441 const unsigned size = type_size(ir->type); 1442 1443 for (unsigned i = 0; i < size; i++) { 1444 switch (ir->type->base_type) { 1445 case GLSL_TYPE_FLOAT: 1446 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1447 break; 1448 case GLSL_TYPE_UINT: 1449 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1450 break; 1451 case GLSL_TYPE_INT: 1452 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1453 break; 1454 case GLSL_TYPE_BOOL: 1455 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1456 break; 1457 default: 1458 assert(!"Non-float/uint/int/bool constant"); 1459 } 1460 dst_reg.reg_offset++; 1461 } 1462 } 1463 1464 this->result = reg; 1465} 1466 1467void 1468fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1469{ 1470 ir_expression *expr = ir->as_expression(); 1471 1472 if (expr) { 1473 fs_reg op[2]; 1474 fs_inst *inst; 1475 1476 assert(expr->get_num_operands() <= 2); 1477 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1478 assert(expr->operands[i]->type->is_scalar()); 1479 1480 expr->operands[i]->accept(this); 1481 op[i] = this->result; 1482 1483 resolve_ud_negate(&op[i]); 1484 } 1485 1486 switch (expr->operation) { 1487 case ir_unop_logic_not: 1488 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1489 inst->conditional_mod = BRW_CONDITIONAL_Z; 1490 break; 1491 1492 case ir_binop_logic_xor: 1493 case ir_binop_logic_or: 1494 case ir_binop_logic_and: 1495 goto out; 1496 1497 case ir_unop_f2b: 1498 if (intel->gen >= 6) { 1499 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1500 } else { 1501 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1502 } 1503 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1504 break; 1505 1506 case ir_unop_i2b: 1507 if (intel->gen >= 6) { 1508 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1509 } else { 1510 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1511 } 1512 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1513 break; 1514 1515 case ir_binop_greater: 1516 case ir_binop_gequal: 1517 case ir_binop_less: 1518 case ir_binop_lequal: 1519 case ir_binop_equal: 1520 case ir_binop_all_equal: 1521 case ir_binop_nequal: 1522 case ir_binop_any_nequal: 1523 resolve_bool_comparison(expr->operands[0], &op[0]); 1524 resolve_bool_comparison(expr->operands[1], &op[1]); 1525 1526 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1527 inst->conditional_mod = 1528 brw_conditional_for_comparison(expr->operation); 1529 break; 1530 1531 default: 1532 assert(!"not reached"); 1533 fail("bad cond code\n"); 1534 break; 1535 } 1536 return; 1537 } 1538 1539out: 1540 ir->accept(this); 1541 1542 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1543 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1544} 1545 1546/** 1547 * Emit a gen6 IF statement with the comparison folded into the IF 1548 * instruction. 1549 */ 1550void 1551fs_visitor::emit_if_gen6(ir_if *ir) 1552{ 1553 ir_expression *expr = ir->condition->as_expression(); 1554 1555 if (expr) { 1556 fs_reg op[2]; 1557 fs_inst *inst; 1558 fs_reg temp; 1559 1560 assert(expr->get_num_operands() <= 2); 1561 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1562 assert(expr->operands[i]->type->is_scalar()); 1563 1564 expr->operands[i]->accept(this); 1565 op[i] = this->result; 1566 } 1567 1568 switch (expr->operation) { 1569 case ir_unop_logic_not: 1570 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1571 inst->conditional_mod = BRW_CONDITIONAL_Z; 1572 return; 1573 1574 case ir_binop_logic_xor: 1575 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1576 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1577 return; 1578 1579 case ir_binop_logic_or: 1580 temp = fs_reg(this, glsl_type::bool_type); 1581 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1582 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1583 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1584 return; 1585 1586 case ir_binop_logic_and: 1587 temp = fs_reg(this, glsl_type::bool_type); 1588 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1589 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1590 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1591 return; 1592 1593 case ir_unop_f2b: 1594 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1595 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1596 return; 1597 1598 case ir_unop_i2b: 1599 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1600 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1601 return; 1602 1603 case ir_binop_greater: 1604 case ir_binop_gequal: 1605 case ir_binop_less: 1606 case ir_binop_lequal: 1607 case ir_binop_equal: 1608 case ir_binop_all_equal: 1609 case ir_binop_nequal: 1610 case ir_binop_any_nequal: 1611 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1612 inst->conditional_mod = 1613 brw_conditional_for_comparison(expr->operation); 1614 return; 1615 default: 1616 assert(!"not reached"); 1617 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1618 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1619 fail("bad condition\n"); 1620 return; 1621 } 1622 return; 1623 } 1624 1625 ir->condition->accept(this); 1626 1627 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1628 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1629} 1630 1631void 1632fs_visitor::visit(ir_if *ir) 1633{ 1634 fs_inst *inst; 1635 1636 if (intel->gen < 6 && c->dispatch_width == 16) { 1637 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1638 } 1639 1640 /* Don't point the annotation at the if statement, because then it plus 1641 * the then and else blocks get printed. 1642 */ 1643 this->base_ir = ir->condition; 1644 1645 if (intel->gen == 6) { 1646 emit_if_gen6(ir); 1647 } else { 1648 emit_bool_to_cond_code(ir->condition); 1649 1650 inst = emit(BRW_OPCODE_IF); 1651 inst->predicated = true; 1652 } 1653 1654 foreach_list(node, &ir->then_instructions) { 1655 ir_instruction *ir = (ir_instruction *)node; 1656 this->base_ir = ir; 1657 1658 ir->accept(this); 1659 } 1660 1661 if (!ir->else_instructions.is_empty()) { 1662 emit(BRW_OPCODE_ELSE); 1663 1664 foreach_list(node, &ir->else_instructions) { 1665 ir_instruction *ir = (ir_instruction *)node; 1666 this->base_ir = ir; 1667 1668 ir->accept(this); 1669 } 1670 } 1671 1672 emit(BRW_OPCODE_ENDIF); 1673} 1674 1675void 1676fs_visitor::visit(ir_loop *ir) 1677{ 1678 fs_reg counter = reg_undef; 1679 1680 if (intel->gen < 6 && c->dispatch_width == 16) { 1681 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1682 } 1683 1684 if (ir->counter) { 1685 this->base_ir = ir->counter; 1686 ir->counter->accept(this); 1687 counter = *(variable_storage(ir->counter)); 1688 1689 if (ir->from) { 1690 this->base_ir = ir->from; 1691 ir->from->accept(this); 1692 1693 emit(BRW_OPCODE_MOV, counter, this->result); 1694 } 1695 } 1696 1697 this->base_ir = NULL; 1698 emit(BRW_OPCODE_DO); 1699 1700 if (ir->to) { 1701 this->base_ir = ir->to; 1702 ir->to->accept(this); 1703 1704 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1705 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1706 1707 inst = emit(BRW_OPCODE_BREAK); 1708 inst->predicated = true; 1709 } 1710 1711 foreach_list(node, &ir->body_instructions) { 1712 ir_instruction *ir = (ir_instruction *)node; 1713 1714 this->base_ir = ir; 1715 ir->accept(this); 1716 } 1717 1718 if (ir->increment) { 1719 this->base_ir = ir->increment; 1720 ir->increment->accept(this); 1721 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1722 } 1723 1724 this->base_ir = NULL; 1725 emit(BRW_OPCODE_WHILE); 1726} 1727 1728void 1729fs_visitor::visit(ir_loop_jump *ir) 1730{ 1731 switch (ir->mode) { 1732 case ir_loop_jump::jump_break: 1733 emit(BRW_OPCODE_BREAK); 1734 break; 1735 case ir_loop_jump::jump_continue: 1736 emit(BRW_OPCODE_CONTINUE); 1737 break; 1738 } 1739} 1740 1741void 1742fs_visitor::visit(ir_call *ir) 1743{ 1744 assert(!"FINISHME"); 1745} 1746 1747void 1748fs_visitor::visit(ir_return *ir) 1749{ 1750 assert(!"FINISHME"); 1751} 1752 1753void 1754fs_visitor::visit(ir_function *ir) 1755{ 1756 /* Ignore function bodies other than main() -- we shouldn't see calls to 1757 * them since they should all be inlined before we get to ir_to_mesa. 1758 */ 1759 if (strcmp(ir->name, "main") == 0) { 1760 const ir_function_signature *sig; 1761 exec_list empty; 1762 1763 sig = ir->matching_signature(&empty); 1764 1765 assert(sig); 1766 1767 foreach_list(node, &sig->body) { 1768 ir_instruction *ir = (ir_instruction *)node; 1769 this->base_ir = ir; 1770 1771 ir->accept(this); 1772 } 1773 } 1774} 1775 1776void 1777fs_visitor::visit(ir_function_signature *ir) 1778{ 1779 assert(!"not reached"); 1780 (void)ir; 1781} 1782 1783fs_inst * 1784fs_visitor::emit(fs_inst inst) 1785{ 1786 fs_inst *list_inst = new(mem_ctx) fs_inst; 1787 *list_inst = inst; 1788 1789 if (force_uncompressed_stack > 0) 1790 list_inst->force_uncompressed = true; 1791 else if (force_sechalf_stack > 0) 1792 list_inst->force_sechalf = true; 1793 1794 list_inst->annotation = this->current_annotation; 1795 list_inst->ir = this->base_ir; 1796 1797 this->instructions.push_tail(list_inst); 1798 1799 return list_inst; 1800} 1801 1802/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1803void 1804fs_visitor::emit_dummy_fs() 1805{ 1806 int reg_width = c->dispatch_width / 8; 1807 1808 /* Everyone's favorite color. */ 1809 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); 1810 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); 1811 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); 1812 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); 1813 1814 fs_inst *write; 1815 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1816 write->base_mrf = 2; 1817 write->mlen = 4 * reg_width; 1818 write->eot = true; 1819} 1820 1821/* The register location here is relative to the start of the URB 1822 * data. It will get adjusted to be a real location before 1823 * generate_code() time. 1824 */ 1825struct brw_reg 1826fs_visitor::interp_reg(int location, int channel) 1827{ 1828 int regnr = urb_setup[location] * 2 + channel / 2; 1829 int stride = (channel & 1) * 4; 1830 1831 assert(urb_setup[location] != -1); 1832 1833 return brw_vec1_grf(regnr, stride); 1834} 1835 1836/** Emits the interpolation for the varying inputs. */ 1837void 1838fs_visitor::emit_interpolation_setup_gen4() 1839{ 1840 this->current_annotation = "compute pixel centers"; 1841 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1842 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1843 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1844 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1845 1846 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1847 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1848 1849 this->current_annotation = "compute pixel deltas from v0"; 1850 if (brw->has_pln) { 1851 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1852 fs_reg(this, glsl_type::vec2_type); 1853 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1854 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; 1855 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; 1856 } else { 1857 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1858 fs_reg(this, glsl_type::float_type); 1859 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1860 fs_reg(this, glsl_type::float_type); 1861 } 1862 emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1863 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1864 emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1865 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1866 1867 this->current_annotation = "compute pos.w and 1/pos.w"; 1868 /* Compute wpos.w. It's always in our setup, since it's needed to 1869 * interpolate the other attributes. 1870 */ 1871 this->wpos_w = fs_reg(this, glsl_type::float_type); 1872 emit(FS_OPCODE_LINTERP, wpos_w, 1873 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1874 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1875 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1876 /* Compute the pixel 1/W value from wpos.w. */ 1877 this->pixel_w = fs_reg(this, glsl_type::float_type); 1878 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1879 this->current_annotation = NULL; 1880} 1881 1882/** Emits the interpolation for the varying inputs. */ 1883void 1884fs_visitor::emit_interpolation_setup_gen6() 1885{ 1886 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1887 1888 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1889 this->current_annotation = "compute pixel centers"; 1890 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1891 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1892 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1893 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1894 emit(BRW_OPCODE_ADD, 1895 int_pixel_x, 1896 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1897 fs_reg(brw_imm_v(0x10101010))); 1898 emit(BRW_OPCODE_ADD, 1899 int_pixel_y, 1900 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1901 fs_reg(brw_imm_v(0x11001100))); 1902 1903 /* As of gen6, we can no longer mix float and int sources. We have 1904 * to turn the integer pixel centers into floats for their actual 1905 * use. 1906 */ 1907 this->pixel_x = fs_reg(this, glsl_type::float_type); 1908 this->pixel_y = fs_reg(this, glsl_type::float_type); 1909 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1910 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1911 1912 this->current_annotation = "compute pos.w"; 1913 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1914 this->wpos_w = fs_reg(this, glsl_type::float_type); 1915 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1916 1917 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { 1918 uint8_t reg = c->barycentric_coord_reg[i]; 1919 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); 1920 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); 1921 } 1922 1923 this->current_annotation = NULL; 1924} 1925 1926void 1927fs_visitor::emit_color_write(int target, int index, int first_color_mrf) 1928{ 1929 int reg_width = c->dispatch_width / 8; 1930 fs_inst *inst; 1931 fs_reg color = outputs[target]; 1932 fs_reg mrf; 1933 1934 /* If there's no color data to be written, skip it. */ 1935 if (color.file == BAD_FILE) 1936 return; 1937 1938 color.reg_offset += index; 1939 1940 if (c->dispatch_width == 8 || intel->gen >= 6) { 1941 /* SIMD8 write looks like: 1942 * m + 0: r0 1943 * m + 1: r1 1944 * m + 2: g0 1945 * m + 3: g1 1946 * 1947 * gen6 SIMD16 DP write looks like: 1948 * m + 0: r0 1949 * m + 1: r1 1950 * m + 2: g0 1951 * m + 3: g1 1952 * m + 4: b0 1953 * m + 5: b1 1954 * m + 6: a0 1955 * m + 7: a1 1956 */ 1957 inst = emit(BRW_OPCODE_MOV, 1958 fs_reg(MRF, first_color_mrf + index * reg_width, color.type), 1959 color); 1960 inst->saturate = c->key.clamp_fragment_color; 1961 } else { 1962 /* pre-gen6 SIMD16 single source DP write looks like: 1963 * m + 0: r0 1964 * m + 1: g0 1965 * m + 2: b0 1966 * m + 3: a0 1967 * m + 4: r1 1968 * m + 5: g1 1969 * m + 6: b1 1970 * m + 7: a1 1971 */ 1972 if (brw->has_compr4) { 1973 /* By setting the high bit of the MRF register number, we 1974 * indicate that we want COMPR4 mode - instead of doing the 1975 * usual destination + 1 for the second half we get 1976 * destination + 4. 1977 */ 1978 inst = emit(BRW_OPCODE_MOV, 1979 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, 1980 color.type), 1981 color); 1982 inst->saturate = c->key.clamp_fragment_color; 1983 } else { 1984 push_force_uncompressed(); 1985 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index, 1986 color.type), 1987 color); 1988 inst->saturate = c->key.clamp_fragment_color; 1989 pop_force_uncompressed(); 1990 1991 push_force_sechalf(); 1992 color.sechalf = true; 1993 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4, 1994 color.type), 1995 color); 1996 inst->saturate = c->key.clamp_fragment_color; 1997 pop_force_sechalf(); 1998 color.sechalf = false; 1999 } 2000 } 2001} 2002 2003void 2004fs_visitor::emit_fb_writes() 2005{ 2006 this->current_annotation = "FB write header"; 2007 bool header_present = true; 2008 /* We can potentially have a message length of up to 15, so we have to set 2009 * base_mrf to either 0 or 1 in order to fit in m0..m15. 2010 */ 2011 int base_mrf = 1; 2012 int nr = base_mrf; 2013 int reg_width = c->dispatch_width / 8; 2014 bool do_dual_src = this->dual_src_output.file != BAD_FILE; 2015 2016 if (c->dispatch_width == 16 && do_dual_src) { 2017 fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); 2018 do_dual_src = false; 2019 } 2020 2021 /* From the Sandy Bridge PRM, volume 4, page 198: 2022 * 2023 * "Dispatched Pixel Enables. One bit per pixel indicating 2024 * which pixels were originally enabled when the thread was 2025 * dispatched. This field is only required for the end-of- 2026 * thread message and on all dual-source messages." 2027 */ 2028 if (intel->gen >= 6 && 2029 !this->fp->UsesKill && 2030 !do_dual_src && 2031 c->key.nr_color_regions == 1) { 2032 header_present = false; 2033 } 2034 2035 if (header_present) { 2036 /* m2, m3 header */ 2037 nr += 2; 2038 } 2039 2040 if (c->aa_dest_stencil_reg) { 2041 push_force_uncompressed(); 2042 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2043 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 2044 pop_force_uncompressed(); 2045 } 2046 2047 /* Reserve space for color. It'll be filled in per MRT below. */ 2048 int color_mrf = nr; 2049 nr += 4 * reg_width; 2050 if (do_dual_src) 2051 nr += 4; 2052 2053 if (c->source_depth_to_render_target) { 2054 if (intel->gen == 6 && c->dispatch_width == 16) { 2055 /* For outputting oDepth on gen6, SIMD8 writes have to be 2056 * used. This would require 8-wide moves of each half to 2057 * message regs, kind of like pre-gen5 SIMD16 FB writes. 2058 * Just bail on doing so for now. 2059 */ 2060 fail("Missing support for simd16 depth writes on gen6\n"); 2061 } 2062 2063 if (c->computes_depth) { 2064 /* Hand over gl_FragDepth. */ 2065 assert(this->frag_depth); 2066 fs_reg depth = *(variable_storage(this->frag_depth)); 2067 2068 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 2069 } else { 2070 /* Pass through the payload depth. */ 2071 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2072 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 2073 } 2074 nr += reg_width; 2075 } 2076 2077 if (c->dest_depth_reg) { 2078 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2079 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 2080 nr += reg_width; 2081 } 2082 2083 if (do_dual_src) { 2084 fs_reg src0 = this->outputs[0]; 2085 fs_reg src1 = this->dual_src_output; 2086 2087 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2088 "FB write src0"); 2089 for (int i = 0; i < 4; i++) { 2090 fs_inst *inst = emit(BRW_OPCODE_MOV, 2091 fs_reg(MRF, color_mrf + i, src0.type), 2092 src0); 2093 src0.reg_offset++; 2094 inst->saturate = c->key.clamp_fragment_color; 2095 } 2096 2097 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2098 "FB write src1"); 2099 for (int i = 0; i < 4; i++) { 2100 fs_inst *inst = emit(BRW_OPCODE_MOV, 2101 fs_reg(MRF, color_mrf + 4 + i, src1.type), 2102 src1); 2103 src1.reg_offset++; 2104 inst->saturate = c->key.clamp_fragment_color; 2105 } 2106 2107 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2108 inst->target = 0; 2109 inst->base_mrf = base_mrf; 2110 inst->mlen = nr - base_mrf; 2111 inst->eot = true; 2112 inst->header_present = header_present; 2113 2114 c->prog_data.dual_src_blend = true; 2115 this->current_annotation = NULL; 2116 return; 2117 } 2118 2119 for (int target = 0; target < c->key.nr_color_regions; target++) { 2120 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2121 "FB write target %d", 2122 target); 2123 for (unsigned i = 0; i < this->output_components[target]; i++) 2124 emit_color_write(target, i, color_mrf); 2125 2126 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2127 inst->target = target; 2128 inst->base_mrf = base_mrf; 2129 inst->mlen = nr - base_mrf; 2130 if (target == c->key.nr_color_regions - 1) 2131 inst->eot = true; 2132 inst->header_present = header_present; 2133 } 2134 2135 if (c->key.nr_color_regions == 0) { 2136 /* Even if there's no color buffers enabled, we still need to send 2137 * alpha out the pipeline to our null renderbuffer to support 2138 * alpha-testing, alpha-to-coverage, and so on. 2139 */ 2140 emit_color_write(0, 3, color_mrf); 2141 2142 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2143 inst->base_mrf = base_mrf; 2144 inst->mlen = nr - base_mrf; 2145 inst->eot = true; 2146 inst->header_present = header_present; 2147 } 2148 2149 this->current_annotation = NULL; 2150} 2151 2152void 2153fs_visitor::resolve_ud_negate(fs_reg *reg) 2154{ 2155 if (reg->type != BRW_REGISTER_TYPE_UD || 2156 !reg->negate) 2157 return; 2158 2159 fs_reg temp = fs_reg(this, glsl_type::uint_type); 2160 emit(BRW_OPCODE_MOV, temp, *reg); 2161 *reg = temp; 2162} 2163 2164void 2165fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) 2166{ 2167 if (rvalue->type != glsl_type::bool_type) 2168 return; 2169 2170 fs_reg temp = fs_reg(this, glsl_type::bool_type); 2171 emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1)); 2172 *reg = temp; 2173} 2174 2175fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 2176 struct brw_shader *shader) 2177{ 2178 this->c = c; 2179 this->p = &c->func; 2180 this->brw = p->brw; 2181 this->fp = (struct gl_fragment_program *) 2182 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 2183 this->prog = prog; 2184 this->intel = &brw->intel; 2185 this->ctx = &intel->ctx; 2186 this->mem_ctx = ralloc_context(NULL); 2187 this->shader = shader; 2188 this->failed = false; 2189 this->variable_ht = hash_table_ctor(0, 2190 hash_table_pointer_hash, 2191 hash_table_pointer_compare); 2192 2193 /* There's a question that appears to be left open in the spec: 2194 * How do implicit dst conversions interact with the CMP 2195 * instruction or conditional mods? On gen6, the instruction: 2196 * 2197 * CMP null<d> src0<f> src1<f> 2198 * 2199 * will do src1 - src0 and compare that result as if it was an 2200 * integer. On gen4, it will do src1 - src0 as float, convert 2201 * the result to int, and compare as int. In between, it 2202 * appears that it does src1 - src0 and does the compare in the 2203 * execution type so dst type doesn't matter. 2204 */ 2205 if (this->intel->gen > 4) 2206 this->reg_null_cmp = reg_null_d; 2207 else 2208 this->reg_null_cmp = reg_null_f; 2209 2210 this->frag_depth = NULL; 2211 memset(this->outputs, 0, sizeof(this->outputs)); 2212 this->first_non_payload_grf = 0; 2213 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 2214 2215 this->current_annotation = NULL; 2216 this->base_ir = NULL; 2217 2218 this->virtual_grf_sizes = NULL; 2219 this->virtual_grf_count = 0; 2220 this->virtual_grf_array_size = 0; 2221 this->virtual_grf_def = NULL; 2222 this->virtual_grf_use = NULL; 2223 this->live_intervals_valid = false; 2224 2225 this->force_uncompressed_stack = 0; 2226 this->force_sechalf_stack = 0; 2227} 2228 2229fs_visitor::~fs_visitor() 2230{ 2231 ralloc_free(this->mem_ctx); 2232 hash_table_dtor(this->variable_ht); 2233} 2234