brw_fs_visitor.cpp revision 9544e44262651a51ffdb3a572f99f902807a6205
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "glsl/glsl_types.h" 50#include "glsl/ir_optimization.h" 51#include "glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (ir->mode == ir_var_in) { 62 if (!strcmp(ir->name, "gl_FragCoord")) { 63 reg = emit_fragcoord_interpolation(ir); 64 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 65 reg = emit_frontfacing_interpolation(ir); 66 } else { 67 reg = emit_general_interpolation(ir); 68 } 69 assert(reg); 70 hash_table_insert(this->variable_ht, reg, ir); 71 return; 72 } else if (ir->mode == ir_var_out) { 73 reg = new(this->mem_ctx) fs_reg(this, ir->type); 74 75 if (ir->index > 0) { 76 assert(ir->location == FRAG_RESULT_DATA0); 77 assert(ir->index == 1); 78 this->dual_src_output = *reg; 79 } else if (ir->location == FRAG_RESULT_COLOR) { 80 /* Writing gl_FragColor outputs to all color regions. */ 81 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { 82 this->outputs[i] = *reg; 83 this->output_components[i] = 4; 84 } 85 } else if (ir->location == FRAG_RESULT_DEPTH) { 86 this->frag_depth = ir; 87 } else { 88 /* gl_FragData or a user-defined FS output */ 89 assert(ir->location >= FRAG_RESULT_DATA0 && 90 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); 91 92 int vector_elements = 93 ir->type->is_array() ? ir->type->fields.array->vector_elements 94 : ir->type->vector_elements; 95 96 /* General color output. */ 97 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { 98 int output = ir->location - FRAG_RESULT_DATA0 + i; 99 this->outputs[output] = *reg; 100 this->outputs[output].reg_offset += vector_elements * i; 101 this->output_components[output] = vector_elements; 102 } 103 } 104 } else if (ir->mode == ir_var_uniform) { 105 int param_index = c->prog_data.nr_params; 106 107 if (c->dispatch_width == 16) { 108 if (!variable_storage(ir)) { 109 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 110 } 111 return; 112 } 113 114 if (!strncmp(ir->name, "gl_", 3)) { 115 setup_builtin_uniform_values(ir); 116 } else { 117 setup_uniform_values(ir->location, ir->type); 118 } 119 120 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 121 reg->type = brw_type_for_base_type(ir->type); 122 } 123 124 if (!reg) 125 reg = new(this->mem_ctx) fs_reg(this, ir->type); 126 127 hash_table_insert(this->variable_ht, reg, ir); 128} 129 130void 131fs_visitor::visit(ir_dereference_variable *ir) 132{ 133 fs_reg *reg = variable_storage(ir->var); 134 this->result = *reg; 135} 136 137void 138fs_visitor::visit(ir_dereference_record *ir) 139{ 140 const glsl_type *struct_type = ir->record->type; 141 142 ir->record->accept(this); 143 144 unsigned int offset = 0; 145 for (unsigned int i = 0; i < struct_type->length; i++) { 146 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 147 break; 148 offset += type_size(struct_type->fields.structure[i].type); 149 } 150 this->result.reg_offset += offset; 151 this->result.type = brw_type_for_base_type(ir->type); 152} 153 154void 155fs_visitor::visit(ir_dereference_array *ir) 156{ 157 ir_constant *index; 158 int element_size; 159 160 ir->array->accept(this); 161 index = ir->array_index->as_constant(); 162 163 element_size = type_size(ir->type); 164 this->result.type = brw_type_for_base_type(ir->type); 165 166 if (index) { 167 assert(this->result.file == UNIFORM || this->result.file == GRF); 168 this->result.reg_offset += index->value.i[0] * element_size; 169 } else { 170 assert(!"FINISHME: non-constant array element"); 171 } 172} 173 174/* Instruction selection: Produce a MOV.sat instead of 175 * MIN(MAX(val, 0), 1) when possible. 176 */ 177bool 178fs_visitor::try_emit_saturate(ir_expression *ir) 179{ 180 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 181 182 if (!sat_val) 183 return false; 184 185 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); 186 187 sat_val->accept(this); 188 fs_reg src = this->result; 189 190 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); 191 192 /* If the last instruction from our accept() didn't generate our 193 * src, generate a saturated MOV 194 */ 195 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); 196 if (!modify || modify->regs_written() != 1) { 197 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 198 inst->saturate = true; 199 } else { 200 modify->saturate = true; 201 this->result = src; 202 } 203 204 205 return true; 206} 207 208bool 209fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) 210{ 211 /* 3-src instructions were introduced in gen6. */ 212 if (intel->gen < 6) 213 return false; 214 215 /* MAD can only handle floating-point data. */ 216 if (ir->type != glsl_type::float_type) 217 return false; 218 219 ir_rvalue *nonmul = ir->operands[1 - mul_arg]; 220 ir_expression *mul = ir->operands[mul_arg]->as_expression(); 221 222 if (!mul || mul->operation != ir_binop_mul) 223 return false; 224 225 if (nonmul->as_constant() || 226 mul->operands[0]->as_constant() || 227 mul->operands[1]->as_constant()) 228 return false; 229 230 nonmul->accept(this); 231 fs_reg src0 = this->result; 232 233 mul->operands[0]->accept(this); 234 fs_reg src1 = this->result; 235 236 mul->operands[1]->accept(this); 237 fs_reg src2 = this->result; 238 239 this->result = fs_reg(this, ir->type); 240 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); 241 242 return true; 243} 244 245void 246fs_visitor::visit(ir_expression *ir) 247{ 248 unsigned int operand; 249 fs_reg op[2], temp; 250 fs_inst *inst; 251 252 assert(ir->get_num_operands() <= 2); 253 254 if (try_emit_saturate(ir)) 255 return; 256 if (ir->operation == ir_binop_add) { 257 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) 258 return; 259 } 260 261 for (operand = 0; operand < ir->get_num_operands(); operand++) { 262 ir->operands[operand]->accept(this); 263 if (this->result.file == BAD_FILE) { 264 ir_print_visitor v; 265 fail("Failed to get tree for expression operand:\n"); 266 ir->operands[operand]->accept(&v); 267 } 268 op[operand] = this->result; 269 270 /* Matrix expression operands should have been broken down to vector 271 * operations already. 272 */ 273 assert(!ir->operands[operand]->type->is_matrix()); 274 /* And then those vector operands should have been broken down to scalar. 275 */ 276 assert(!ir->operands[operand]->type->is_vector()); 277 } 278 279 /* Storage for our result. If our result goes into an assignment, it will 280 * just get copy-propagated out, so no worries. 281 */ 282 this->result = fs_reg(this, ir->type); 283 284 switch (ir->operation) { 285 case ir_unop_logic_not: 286 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 287 * ones complement of the whole register, not just bit 0. 288 */ 289 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 290 break; 291 case ir_unop_neg: 292 op[0].negate = !op[0].negate; 293 this->result = op[0]; 294 break; 295 case ir_unop_abs: 296 op[0].abs = true; 297 op[0].negate = false; 298 this->result = op[0]; 299 break; 300 case ir_unop_sign: 301 temp = fs_reg(this, ir->type); 302 303 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 304 305 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 306 inst->conditional_mod = BRW_CONDITIONAL_G; 307 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 308 inst->predicated = true; 309 310 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 311 inst->conditional_mod = BRW_CONDITIONAL_L; 312 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 313 inst->predicated = true; 314 315 break; 316 case ir_unop_rcp: 317 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 318 break; 319 320 case ir_unop_exp2: 321 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 322 break; 323 case ir_unop_log2: 324 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 325 break; 326 case ir_unop_exp: 327 case ir_unop_log: 328 assert(!"not reached: should be handled by ir_explog_to_explog2"); 329 break; 330 case ir_unop_sin: 331 case ir_unop_sin_reduced: 332 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 333 break; 334 case ir_unop_cos: 335 case ir_unop_cos_reduced: 336 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 337 break; 338 339 case ir_unop_dFdx: 340 emit(FS_OPCODE_DDX, this->result, op[0]); 341 break; 342 case ir_unop_dFdy: 343 emit(FS_OPCODE_DDY, this->result, op[0]); 344 break; 345 346 case ir_binop_add: 347 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 348 break; 349 case ir_binop_sub: 350 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 351 break; 352 353 case ir_binop_mul: 354 if (ir->type->is_integer()) { 355 /* For integer multiplication, the MUL uses the low 16 bits 356 * of one of the operands (src0 on gen6, src1 on gen7). The 357 * MACH accumulates in the contribution of the upper 16 bits 358 * of that operand. 359 * 360 * FINISHME: Emit just the MUL if we know an operand is small 361 * enough. 362 */ 363 if (intel->gen >= 7 && c->dispatch_width == 16) 364 fail("16-wide explicit accumulator operands unsupported\n"); 365 366 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); 367 368 emit(BRW_OPCODE_MUL, acc, op[0], op[1]); 369 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); 370 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); 371 } else { 372 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 373 } 374 break; 375 case ir_binop_div: 376 if (intel->gen >= 7 && c->dispatch_width == 16) 377 fail("16-wide INTDIV unsupported\n"); 378 379 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ 380 assert(ir->type->is_integer()); 381 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); 382 break; 383 case ir_binop_mod: 384 if (intel->gen >= 7 && c->dispatch_width == 16) 385 fail("16-wide INTDIV unsupported\n"); 386 387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 388 assert(ir->type->is_integer()); 389 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); 390 break; 391 392 case ir_binop_less: 393 case ir_binop_greater: 394 case ir_binop_lequal: 395 case ir_binop_gequal: 396 case ir_binop_equal: 397 case ir_binop_all_equal: 398 case ir_binop_nequal: 399 case ir_binop_any_nequal: 400 temp = this->result; 401 /* original gen4 does implicit conversion before comparison. */ 402 if (intel->gen < 5) 403 temp.type = op[0].type; 404 405 resolve_ud_negate(&op[0]); 406 resolve_ud_negate(&op[1]); 407 408 resolve_bool_comparison(ir->operands[0], &op[0]); 409 resolve_bool_comparison(ir->operands[1], &op[1]); 410 411 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 412 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 413 break; 414 415 case ir_binop_logic_xor: 416 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 417 break; 418 419 case ir_binop_logic_or: 420 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 421 break; 422 423 case ir_binop_logic_and: 424 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 425 break; 426 427 case ir_binop_dot: 428 case ir_unop_any: 429 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 430 break; 431 432 case ir_unop_noise: 433 assert(!"not reached: should be handled by lower_noise"); 434 break; 435 436 case ir_quadop_vector: 437 assert(!"not reached: should be handled by lower_quadop_vector"); 438 break; 439 440 case ir_unop_sqrt: 441 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 442 break; 443 444 case ir_unop_rsq: 445 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 446 break; 447 448 case ir_unop_bitcast_i2f: 449 case ir_unop_bitcast_u2f: 450 op[0].type = BRW_REGISTER_TYPE_F; 451 this->result = op[0]; 452 break; 453 case ir_unop_i2u: 454 case ir_unop_bitcast_f2u: 455 op[0].type = BRW_REGISTER_TYPE_UD; 456 this->result = op[0]; 457 break; 458 case ir_unop_u2i: 459 case ir_unop_bitcast_f2i: 460 op[0].type = BRW_REGISTER_TYPE_D; 461 this->result = op[0]; 462 break; 463 case ir_unop_i2f: 464 case ir_unop_u2f: 465 case ir_unop_f2i: 466 case ir_unop_f2u: 467 emit(BRW_OPCODE_MOV, this->result, op[0]); 468 break; 469 470 case ir_unop_b2i: 471 inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1)); 472 break; 473 case ir_unop_b2f: 474 temp = fs_reg(this, glsl_type::int_type); 475 emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1)); 476 emit(BRW_OPCODE_MOV, this->result, temp); 477 break; 478 479 case ir_unop_f2b: 480 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)); 481 inst->conditional_mod = BRW_CONDITIONAL_NZ; 482 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 483 break; 484 case ir_unop_i2b: 485 assert(op[0].type == BRW_REGISTER_TYPE_D); 486 487 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0)); 488 inst->conditional_mod = BRW_CONDITIONAL_NZ; 489 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 490 break; 491 492 case ir_unop_trunc: 493 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 494 break; 495 case ir_unop_ceil: 496 op[0].negate = !op[0].negate; 497 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 498 this->result.negate = true; 499 break; 500 case ir_unop_floor: 501 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 502 break; 503 case ir_unop_fract: 504 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 505 break; 506 case ir_unop_round_even: 507 emit(BRW_OPCODE_RNDE, this->result, op[0]); 508 break; 509 510 case ir_binop_min: 511 resolve_ud_negate(&op[0]); 512 resolve_ud_negate(&op[1]); 513 514 if (intel->gen >= 6) { 515 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 516 inst->conditional_mod = BRW_CONDITIONAL_L; 517 } else { 518 /* Unalias the destination */ 519 this->result = fs_reg(this, ir->type); 520 521 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 522 inst->conditional_mod = BRW_CONDITIONAL_L; 523 524 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 525 inst->predicated = true; 526 } 527 break; 528 case ir_binop_max: 529 resolve_ud_negate(&op[0]); 530 resolve_ud_negate(&op[1]); 531 532 if (intel->gen >= 6) { 533 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 534 inst->conditional_mod = BRW_CONDITIONAL_GE; 535 } else { 536 /* Unalias the destination */ 537 this->result = fs_reg(this, ir->type); 538 539 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 540 inst->conditional_mod = BRW_CONDITIONAL_G; 541 542 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 543 inst->predicated = true; 544 } 545 break; 546 547 case ir_binop_pow: 548 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 549 break; 550 551 case ir_unop_bit_not: 552 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 553 break; 554 case ir_binop_bit_and: 555 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 556 break; 557 case ir_binop_bit_xor: 558 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 559 break; 560 case ir_binop_bit_or: 561 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 562 break; 563 564 case ir_binop_lshift: 565 inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]); 566 break; 567 568 case ir_binop_rshift: 569 if (ir->type->base_type == GLSL_TYPE_INT) 570 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]); 571 else 572 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]); 573 break; 574 } 575} 576 577void 578fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 579 const glsl_type *type, bool predicated) 580{ 581 switch (type->base_type) { 582 case GLSL_TYPE_FLOAT: 583 case GLSL_TYPE_UINT: 584 case GLSL_TYPE_INT: 585 case GLSL_TYPE_BOOL: 586 for (unsigned int i = 0; i < type->components(); i++) { 587 l.type = brw_type_for_base_type(type); 588 r.type = brw_type_for_base_type(type); 589 590 if (predicated || !l.equals(r)) { 591 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 592 inst->predicated = predicated; 593 } 594 595 l.reg_offset++; 596 r.reg_offset++; 597 } 598 break; 599 case GLSL_TYPE_ARRAY: 600 for (unsigned int i = 0; i < type->length; i++) { 601 emit_assignment_writes(l, r, type->fields.array, predicated); 602 } 603 break; 604 605 case GLSL_TYPE_STRUCT: 606 for (unsigned int i = 0; i < type->length; i++) { 607 emit_assignment_writes(l, r, type->fields.structure[i].type, 608 predicated); 609 } 610 break; 611 612 case GLSL_TYPE_SAMPLER: 613 break; 614 615 default: 616 assert(!"not reached"); 617 break; 618 } 619} 620 621/* If the RHS processing resulted in an instruction generating a 622 * temporary value, and it would be easy to rewrite the instruction to 623 * generate its result right into the LHS instead, do so. This ends 624 * up reliably removing instructions where it can be tricky to do so 625 * later without real UD chain information. 626 */ 627bool 628fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 629 fs_reg dst, 630 fs_reg src, 631 fs_inst *pre_rhs_inst, 632 fs_inst *last_rhs_inst) 633{ 634 /* Only attempt if we're doing a direct assignment. */ 635 if (ir->condition || 636 !(ir->lhs->type->is_scalar() || 637 (ir->lhs->type->is_vector() && 638 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) 639 return false; 640 641 /* Make sure the last instruction generated our source reg. */ 642 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, 643 last_rhs_inst, 644 src); 645 if (!modify) 646 return false; 647 648 /* If last_rhs_inst wrote a different number of components than our LHS, 649 * we can't safely rewrite it. 650 */ 651 if (ir->lhs->type->vector_elements != modify->regs_written()) 652 return false; 653 654 /* Success! Rewrite the instruction. */ 655 modify->dst = dst; 656 657 return true; 658} 659 660void 661fs_visitor::visit(ir_assignment *ir) 662{ 663 fs_reg l, r; 664 fs_inst *inst; 665 666 /* FINISHME: arrays on the lhs */ 667 ir->lhs->accept(this); 668 l = this->result; 669 670 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); 671 672 ir->rhs->accept(this); 673 r = this->result; 674 675 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); 676 677 assert(l.file != BAD_FILE); 678 assert(r.file != BAD_FILE); 679 680 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) 681 return; 682 683 if (ir->condition) { 684 emit_bool_to_cond_code(ir->condition); 685 } 686 687 if (ir->lhs->type->is_scalar() || 688 ir->lhs->type->is_vector()) { 689 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 690 if (ir->write_mask & (1 << i)) { 691 inst = emit(BRW_OPCODE_MOV, l, r); 692 if (ir->condition) 693 inst->predicated = true; 694 r.reg_offset++; 695 } 696 l.reg_offset++; 697 } 698 } else { 699 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 700 } 701} 702 703fs_inst * 704fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 705 int sampler) 706{ 707 int mlen; 708 int base_mrf = 1; 709 bool simd16 = false; 710 fs_reg orig_dst; 711 712 /* g0 header. */ 713 mlen = 1; 714 715 if (ir->shadow_comparitor) { 716 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 717 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 718 coordinate.reg_offset++; 719 } 720 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 721 mlen += 3; 722 723 if (ir->op == ir_tex) { 724 /* There's no plain shadow compare message, so we use shadow 725 * compare with a bias of 0.0. 726 */ 727 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 728 mlen++; 729 } else if (ir->op == ir_txb) { 730 ir->lod_info.bias->accept(this); 731 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 732 mlen++; 733 } else { 734 assert(ir->op == ir_txl); 735 ir->lod_info.lod->accept(this); 736 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 737 mlen++; 738 } 739 740 ir->shadow_comparitor->accept(this); 741 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 742 mlen++; 743 } else if (ir->op == ir_tex) { 744 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 745 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 746 coordinate.reg_offset++; 747 } 748 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 749 mlen += 3; 750 } else if (ir->op == ir_txd) { 751 ir->lod_info.grad.dPdx->accept(this); 752 fs_reg dPdx = this->result; 753 754 ir->lod_info.grad.dPdy->accept(this); 755 fs_reg dPdy = this->result; 756 757 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 758 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 759 coordinate.reg_offset++; 760 } 761 /* the slots for u and v are always present, but r is optional */ 762 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 763 764 /* P = u, v, r 765 * dPdx = dudx, dvdx, drdx 766 * dPdy = dudy, dvdy, drdy 767 * 768 * 1-arg: Does not exist. 769 * 770 * 2-arg: dudx dvdx dudy dvdy 771 * dPdx.x dPdx.y dPdy.x dPdy.y 772 * m4 m5 m6 m7 773 * 774 * 3-arg: dudx dvdx drdx dudy dvdy drdy 775 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 776 * m5 m6 m7 m8 m9 m10 777 */ 778 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 779 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 780 dPdx.reg_offset++; 781 } 782 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 783 784 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 785 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 786 dPdy.reg_offset++; 787 } 788 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 789 } else if (ir->op == ir_txs) { 790 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ 791 simd16 = true; 792 ir->lod_info.lod->accept(this); 793 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 794 mlen += 2; 795 } else { 796 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 797 * instructions. We'll need to do SIMD16 here. 798 */ 799 simd16 = true; 800 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); 801 802 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 803 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), 804 coordinate); 805 coordinate.reg_offset++; 806 } 807 808 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to 809 * be necessary for TXF (ld), but seems wise to do for all messages. 810 */ 811 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { 812 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); 813 } 814 815 /* lod/bias appears after u/v/r. */ 816 mlen += 6; 817 818 if (ir->op == ir_txb) { 819 ir->lod_info.bias->accept(this); 820 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 821 mlen++; 822 } else { 823 ir->lod_info.lod->accept(this); 824 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, this->result.type), 825 this->result); 826 mlen++; 827 } 828 829 /* The unused upper half. */ 830 mlen++; 831 } 832 833 if (simd16) { 834 /* Now, since we're doing simd16, the return is 2 interleaved 835 * vec4s where the odd-indexed ones are junk. We'll need to move 836 * this weirdness around to the expected layout. 837 */ 838 orig_dst = dst; 839 const glsl_type *vec_type = 840 glsl_type::get_instance(ir->type->base_type, 4, 1); 841 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); 842 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) 843 : BRW_REGISTER_TYPE_F; 844 } 845 846 fs_inst *inst = NULL; 847 switch (ir->op) { 848 case ir_tex: 849 inst = emit(SHADER_OPCODE_TEX, dst); 850 break; 851 case ir_txb: 852 inst = emit(FS_OPCODE_TXB, dst); 853 break; 854 case ir_txl: 855 inst = emit(SHADER_OPCODE_TXL, dst); 856 break; 857 case ir_txd: 858 inst = emit(SHADER_OPCODE_TXD, dst); 859 break; 860 case ir_txs: 861 inst = emit(SHADER_OPCODE_TXS, dst); 862 break; 863 case ir_txf: 864 inst = emit(SHADER_OPCODE_TXF, dst); 865 break; 866 } 867 inst->base_mrf = base_mrf; 868 inst->mlen = mlen; 869 inst->header_present = true; 870 871 if (simd16) { 872 for (int i = 0; i < 4; i++) { 873 emit(BRW_OPCODE_MOV, orig_dst, dst); 874 orig_dst.reg_offset++; 875 dst.reg_offset += 2; 876 } 877 } 878 879 return inst; 880} 881 882/* gen5's sampler has slots for u, v, r, array index, then optional 883 * parameters like shadow comparitor or LOD bias. If optional 884 * parameters aren't present, those base slots are optional and don't 885 * need to be included in the message. 886 * 887 * We don't fill in the unnecessary slots regardless, which may look 888 * surprising in the disassembly. 889 */ 890fs_inst * 891fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 892 int sampler) 893{ 894 int mlen = 0; 895 int base_mrf = 2; 896 int reg_width = c->dispatch_width / 8; 897 bool header_present = false; 898 const int vector_elements = 899 ir->coordinate ? ir->coordinate->type->vector_elements : 0; 900 901 if (ir->offset != NULL && ir->op == ir_txf) { 902 /* It appears that the ld instruction used for txf does its 903 * address bounds check before adding in the offset. To work 904 * around this, just add the integer offset to the integer texel 905 * coordinate, and don't put the offset in the header. 906 */ 907 ir_constant *offset = ir->offset->as_constant(); 908 for (int i = 0; i < vector_elements; i++) { 909 emit(BRW_OPCODE_ADD, 910 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 911 coordinate, 912 offset->value.i[i]); 913 coordinate.reg_offset++; 914 } 915 } else { 916 if (ir->offset) { 917 /* The offsets set up by the ir_texture visitor are in the 918 * m1 header, so we can't go headerless. 919 */ 920 header_present = true; 921 mlen++; 922 base_mrf--; 923 } 924 925 for (int i = 0; i < vector_elements; i++) { 926 emit(BRW_OPCODE_MOV, 927 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 928 coordinate); 929 coordinate.reg_offset++; 930 } 931 } 932 mlen += vector_elements * reg_width; 933 934 if (ir->shadow_comparitor) { 935 mlen = MAX2(mlen, header_present + 4 * reg_width); 936 937 ir->shadow_comparitor->accept(this); 938 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 939 mlen += reg_width; 940 } 941 942 fs_inst *inst = NULL; 943 switch (ir->op) { 944 case ir_tex: 945 inst = emit(SHADER_OPCODE_TEX, dst); 946 break; 947 case ir_txb: 948 ir->lod_info.bias->accept(this); 949 mlen = MAX2(mlen, header_present + 4 * reg_width); 950 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 951 mlen += reg_width; 952 953 inst = emit(FS_OPCODE_TXB, dst); 954 955 break; 956 case ir_txl: 957 ir->lod_info.lod->accept(this); 958 mlen = MAX2(mlen, header_present + 4 * reg_width); 959 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 960 mlen += reg_width; 961 962 inst = emit(SHADER_OPCODE_TXL, dst); 963 break; 964 case ir_txd: { 965 ir->lod_info.grad.dPdx->accept(this); 966 fs_reg dPdx = this->result; 967 968 ir->lod_info.grad.dPdy->accept(this); 969 fs_reg dPdy = this->result; 970 971 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 972 973 /** 974 * P = u, v, r 975 * dPdx = dudx, dvdx, drdx 976 * dPdy = dudy, dvdy, drdy 977 * 978 * Load up these values: 979 * - dudx dudy dvdx dvdy drdx drdy 980 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 981 */ 982 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 983 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 984 dPdx.reg_offset++; 985 mlen += reg_width; 986 987 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 988 dPdy.reg_offset++; 989 mlen += reg_width; 990 } 991 992 inst = emit(SHADER_OPCODE_TXD, dst); 993 break; 994 } 995 case ir_txs: 996 ir->lod_info.lod->accept(this); 997 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 998 mlen += reg_width; 999 inst = emit(SHADER_OPCODE_TXS, dst); 1000 break; 1001 case ir_txf: 1002 mlen = header_present + 4 * reg_width; 1003 1004 ir->lod_info.lod->accept(this); 1005 emit(BRW_OPCODE_MOV, 1006 fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), 1007 this->result); 1008 inst = emit(SHADER_OPCODE_TXF, dst); 1009 break; 1010 } 1011 inst->base_mrf = base_mrf; 1012 inst->mlen = mlen; 1013 inst->header_present = header_present; 1014 1015 if (mlen > 11) { 1016 fail("Message length >11 disallowed by hardware\n"); 1017 } 1018 1019 return inst; 1020} 1021 1022fs_inst * 1023fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 1024 int sampler) 1025{ 1026 int mlen = 0; 1027 int base_mrf = 2; 1028 int reg_width = c->dispatch_width / 8; 1029 bool header_present = false; 1030 int offsets[3]; 1031 1032 if (ir->offset && ir->op != ir_txf) { 1033 /* The offsets set up by the ir_texture visitor are in the 1034 * m1 header, so we can't go headerless. 1035 */ 1036 header_present = true; 1037 mlen++; 1038 base_mrf--; 1039 } 1040 1041 if (ir->shadow_comparitor) { 1042 ir->shadow_comparitor->accept(this); 1043 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1044 mlen += reg_width; 1045 } 1046 1047 /* Set up the LOD info */ 1048 switch (ir->op) { 1049 case ir_tex: 1050 break; 1051 case ir_txb: 1052 ir->lod_info.bias->accept(this); 1053 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1054 mlen += reg_width; 1055 break; 1056 case ir_txl: 1057 ir->lod_info.lod->accept(this); 1058 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1059 mlen += reg_width; 1060 break; 1061 case ir_txd: { 1062 if (c->dispatch_width == 16) 1063 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 1064 1065 ir->lod_info.grad.dPdx->accept(this); 1066 fs_reg dPdx = this->result; 1067 1068 ir->lod_info.grad.dPdy->accept(this); 1069 fs_reg dPdy = this->result; 1070 1071 /* Load dPdx and the coordinate together: 1072 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 1073 */ 1074 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1075 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1076 coordinate.reg_offset++; 1077 mlen += reg_width; 1078 1079 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 1080 dPdx.reg_offset++; 1081 mlen += reg_width; 1082 1083 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 1084 dPdy.reg_offset++; 1085 mlen += reg_width; 1086 } 1087 break; 1088 } 1089 case ir_txs: 1090 ir->lod_info.lod->accept(this); 1091 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 1092 mlen += reg_width; 1093 break; 1094 case ir_txf: 1095 /* It appears that the ld instruction used for txf does its 1096 * address bounds check before adding in the offset. To work 1097 * around this, just add the integer offset to the integer texel 1098 * coordinate, and don't put the offset in the header. 1099 */ 1100 if (ir->offset) { 1101 ir_constant *offset = ir->offset->as_constant(); 1102 offsets[0] = offset->value.i[0]; 1103 offsets[1] = offset->value.i[1]; 1104 offsets[2] = offset->value.i[2]; 1105 } else { 1106 memset(offsets, 0, sizeof(offsets)); 1107 } 1108 1109 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ 1110 emit(BRW_OPCODE_ADD, 1111 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]); 1112 coordinate.reg_offset++; 1113 mlen += reg_width; 1114 1115 ir->lod_info.lod->accept(this); 1116 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), this->result); 1117 mlen += reg_width; 1118 1119 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { 1120 emit(BRW_OPCODE_ADD, 1121 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]); 1122 coordinate.reg_offset++; 1123 mlen += reg_width; 1124 } 1125 break; 1126 } 1127 1128 /* Set up the coordinate (except for cases where it was done above) */ 1129 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { 1130 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1131 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1132 coordinate.reg_offset++; 1133 mlen += reg_width; 1134 } 1135 } 1136 1137 /* Generate the SEND */ 1138 fs_inst *inst = NULL; 1139 switch (ir->op) { 1140 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; 1141 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 1142 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; 1143 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; 1144 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; 1145 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; 1146 } 1147 inst->base_mrf = base_mrf; 1148 inst->mlen = mlen; 1149 inst->header_present = header_present; 1150 1151 if (mlen > 11) { 1152 fail("Message length >11 disallowed by hardware\n"); 1153 } 1154 1155 return inst; 1156} 1157 1158void 1159fs_visitor::visit(ir_texture *ir) 1160{ 1161 fs_inst *inst = NULL; 1162 1163 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 1164 sampler = fp->Base.SamplerUnits[sampler]; 1165 1166 if (ir->coordinate) 1167 ir->coordinate->accept(this); 1168 fs_reg coordinate = this->result; 1169 1170 if (ir->offset != NULL && !(intel->gen == 7 && ir->op == ir_txf)) { 1171 uint32_t offset_bits = brw_texture_offset(ir->offset->as_constant()); 1172 1173 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 1174 emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 1175 fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD))); 1176 1177 /* Then set the offset bits in DWord 2 of the message header. */ 1178 emit(BRW_OPCODE_MOV, 1179 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1180 BRW_REGISTER_TYPE_UD)), 1181 fs_reg(brw_imm_uw(offset_bits))); 1182 } 1183 1184 /* Should be lowered by do_lower_texture_projection */ 1185 assert(!ir->projector); 1186 1187 bool needs_gl_clamp = true; 1188 1189 fs_reg scale_x, scale_y; 1190 1191 /* The 965 requires the EU to do the normalization of GL rectangle 1192 * texture coordinates. We use the program parameter state 1193 * tracking to get the scaling factor. 1194 */ 1195 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT && 1196 (intel->gen < 6 || 1197 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || 1198 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { 1199 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1200 int tokens[STATE_LENGTH] = { 1201 STATE_INTERNAL, 1202 STATE_TEXRECT_SCALE, 1203 sampler, 1204 0, 1205 0 1206 }; 1207 1208 if (c->dispatch_width == 16) { 1209 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1210 this->result = fs_reg(this, ir->type); 1211 return; 1212 } 1213 1214 c->prog_data.param_convert[c->prog_data.nr_params] = 1215 PARAM_NO_CONVERT; 1216 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1217 PARAM_NO_CONVERT; 1218 1219 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1220 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1221 1222 GLuint index = _mesa_add_state_reference(params, 1223 (gl_state_index *)tokens); 1224 1225 this->param_index[c->prog_data.nr_params] = index; 1226 this->param_offset[c->prog_data.nr_params] = 0; 1227 c->prog_data.nr_params++; 1228 this->param_index[c->prog_data.nr_params] = index; 1229 this->param_offset[c->prog_data.nr_params] = 1; 1230 c->prog_data.nr_params++; 1231 } 1232 1233 /* The 965 requires the EU to do the normalization of GL rectangle 1234 * texture coordinates. We use the program parameter state 1235 * tracking to get the scaling factor. 1236 */ 1237 if (intel->gen < 6 && 1238 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1239 fs_reg dst = fs_reg(this, ir->coordinate->type); 1240 fs_reg src = coordinate; 1241 coordinate = dst; 1242 1243 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1244 dst.reg_offset++; 1245 src.reg_offset++; 1246 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1247 } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1248 /* On gen6+, the sampler handles the rectangle coordinates 1249 * natively, without needing rescaling. But that means we have 1250 * to do GL_CLAMP clamping at the [0, width], [0, height] scale, 1251 * not [0, 1] like the default case below. 1252 */ 1253 needs_gl_clamp = false; 1254 1255 for (int i = 0; i < 2; i++) { 1256 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1257 fs_reg chan = coordinate; 1258 chan.reg_offset += i; 1259 1260 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); 1261 inst->conditional_mod = BRW_CONDITIONAL_G; 1262 1263 /* Our parameter comes in as 1.0/width or 1.0/height, 1264 * because that's what people normally want for doing 1265 * texture rectangle handling. We need width or height 1266 * for clamping, but we don't care enough to make a new 1267 * parameter type, so just invert back. 1268 */ 1269 fs_reg limit = fs_reg(this, glsl_type::float_type); 1270 emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y); 1271 emit(SHADER_OPCODE_RCP, limit, limit); 1272 1273 inst = emit(BRW_OPCODE_SEL, chan, chan, limit); 1274 inst->conditional_mod = BRW_CONDITIONAL_L; 1275 } 1276 } 1277 } 1278 1279 if (ir->coordinate && needs_gl_clamp) { 1280 for (unsigned int i = 0; 1281 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { 1282 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1283 fs_reg chan = coordinate; 1284 chan.reg_offset += i; 1285 1286 fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan); 1287 inst->saturate = true; 1288 } 1289 } 1290 } 1291 1292 /* Writemasking doesn't eliminate channels on SIMD8 texture 1293 * samples, so don't worry about them. 1294 */ 1295 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); 1296 1297 if (intel->gen >= 7) { 1298 inst = emit_texture_gen7(ir, dst, coordinate, sampler); 1299 } else if (intel->gen >= 5) { 1300 inst = emit_texture_gen5(ir, dst, coordinate, sampler); 1301 } else { 1302 inst = emit_texture_gen4(ir, dst, coordinate, sampler); 1303 } 1304 1305 /* If there's an offset, we already set up m1. To avoid the implied move, 1306 * use the null register. Otherwise, we want an implied move from g0. 1307 */ 1308 if (ir->offset != NULL || !inst->header_present) 1309 inst->src[0] = reg_undef; 1310 else 1311 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1312 1313 inst->sampler = sampler; 1314 1315 if (ir->shadow_comparitor) 1316 inst->shadow_compare = true; 1317 1318 swizzle_result(ir, dst, sampler); 1319} 1320 1321/** 1322 * Swizzle the result of a texture result. This is necessary for 1323 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1324 */ 1325void 1326fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1327{ 1328 this->result = orig_val; 1329 1330 if (ir->op == ir_txs) 1331 return; 1332 1333 if (ir->type == glsl_type::float_type) { 1334 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1335 assert(ir->sampler->type->sampler_shadow); 1336 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { 1337 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1338 1339 for (int i = 0; i < 4; i++) { 1340 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); 1341 fs_reg l = swizzled_result; 1342 l.reg_offset += i; 1343 1344 if (swiz == SWIZZLE_ZERO) { 1345 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1346 } else if (swiz == SWIZZLE_ONE) { 1347 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1348 } else { 1349 fs_reg r = orig_val; 1350 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); 1351 emit(BRW_OPCODE_MOV, l, r); 1352 } 1353 } 1354 this->result = swizzled_result; 1355 } 1356} 1357 1358void 1359fs_visitor::visit(ir_swizzle *ir) 1360{ 1361 ir->val->accept(this); 1362 fs_reg val = this->result; 1363 1364 if (ir->type->vector_elements == 1) { 1365 this->result.reg_offset += ir->mask.x; 1366 return; 1367 } 1368 1369 fs_reg result = fs_reg(this, ir->type); 1370 this->result = result; 1371 1372 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1373 fs_reg channel = val; 1374 int swiz = 0; 1375 1376 switch (i) { 1377 case 0: 1378 swiz = ir->mask.x; 1379 break; 1380 case 1: 1381 swiz = ir->mask.y; 1382 break; 1383 case 2: 1384 swiz = ir->mask.z; 1385 break; 1386 case 3: 1387 swiz = ir->mask.w; 1388 break; 1389 } 1390 1391 channel.reg_offset += swiz; 1392 emit(BRW_OPCODE_MOV, result, channel); 1393 result.reg_offset++; 1394 } 1395} 1396 1397void 1398fs_visitor::visit(ir_discard *ir) 1399{ 1400 assert(ir->condition == NULL); /* FINISHME */ 1401 1402 emit(FS_OPCODE_DISCARD); 1403} 1404 1405void 1406fs_visitor::visit(ir_constant *ir) 1407{ 1408 /* Set this->result to reg at the bottom of the function because some code 1409 * paths will cause this visitor to be applied to other fields. This will 1410 * cause the value stored in this->result to be modified. 1411 * 1412 * Make reg constant so that it doesn't get accidentally modified along the 1413 * way. Yes, I actually had this problem. :( 1414 */ 1415 const fs_reg reg(this, ir->type); 1416 fs_reg dst_reg = reg; 1417 1418 if (ir->type->is_array()) { 1419 const unsigned size = type_size(ir->type->fields.array); 1420 1421 for (unsigned i = 0; i < ir->type->length; i++) { 1422 ir->array_elements[i]->accept(this); 1423 fs_reg src_reg = this->result; 1424 1425 dst_reg.type = src_reg.type; 1426 for (unsigned j = 0; j < size; j++) { 1427 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1428 src_reg.reg_offset++; 1429 dst_reg.reg_offset++; 1430 } 1431 } 1432 } else if (ir->type->is_record()) { 1433 foreach_list(node, &ir->components) { 1434 ir_constant *const field = (ir_constant *) node; 1435 const unsigned size = type_size(field->type); 1436 1437 field->accept(this); 1438 fs_reg src_reg = this->result; 1439 1440 dst_reg.type = src_reg.type; 1441 for (unsigned j = 0; j < size; j++) { 1442 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1443 src_reg.reg_offset++; 1444 dst_reg.reg_offset++; 1445 } 1446 } 1447 } else { 1448 const unsigned size = type_size(ir->type); 1449 1450 for (unsigned i = 0; i < size; i++) { 1451 switch (ir->type->base_type) { 1452 case GLSL_TYPE_FLOAT: 1453 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1454 break; 1455 case GLSL_TYPE_UINT: 1456 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1457 break; 1458 case GLSL_TYPE_INT: 1459 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1460 break; 1461 case GLSL_TYPE_BOOL: 1462 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1463 break; 1464 default: 1465 assert(!"Non-float/uint/int/bool constant"); 1466 } 1467 dst_reg.reg_offset++; 1468 } 1469 } 1470 1471 this->result = reg; 1472} 1473 1474void 1475fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1476{ 1477 ir_expression *expr = ir->as_expression(); 1478 1479 if (expr) { 1480 fs_reg op[2]; 1481 fs_inst *inst; 1482 1483 assert(expr->get_num_operands() <= 2); 1484 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1485 assert(expr->operands[i]->type->is_scalar()); 1486 1487 expr->operands[i]->accept(this); 1488 op[i] = this->result; 1489 1490 resolve_ud_negate(&op[i]); 1491 } 1492 1493 switch (expr->operation) { 1494 case ir_unop_logic_not: 1495 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1496 inst->conditional_mod = BRW_CONDITIONAL_Z; 1497 break; 1498 1499 case ir_binop_logic_xor: 1500 case ir_binop_logic_or: 1501 case ir_binop_logic_and: 1502 goto out; 1503 1504 case ir_unop_f2b: 1505 if (intel->gen >= 6) { 1506 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1507 } else { 1508 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1509 } 1510 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1511 break; 1512 1513 case ir_unop_i2b: 1514 if (intel->gen >= 6) { 1515 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1516 } else { 1517 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1518 } 1519 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1520 break; 1521 1522 case ir_binop_greater: 1523 case ir_binop_gequal: 1524 case ir_binop_less: 1525 case ir_binop_lequal: 1526 case ir_binop_equal: 1527 case ir_binop_all_equal: 1528 case ir_binop_nequal: 1529 case ir_binop_any_nequal: 1530 resolve_bool_comparison(expr->operands[0], &op[0]); 1531 resolve_bool_comparison(expr->operands[1], &op[1]); 1532 1533 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1534 inst->conditional_mod = 1535 brw_conditional_for_comparison(expr->operation); 1536 break; 1537 1538 default: 1539 assert(!"not reached"); 1540 fail("bad cond code\n"); 1541 break; 1542 } 1543 return; 1544 } 1545 1546out: 1547 ir->accept(this); 1548 1549 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1550 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1551} 1552 1553/** 1554 * Emit a gen6 IF statement with the comparison folded into the IF 1555 * instruction. 1556 */ 1557void 1558fs_visitor::emit_if_gen6(ir_if *ir) 1559{ 1560 ir_expression *expr = ir->condition->as_expression(); 1561 1562 if (expr) { 1563 fs_reg op[2]; 1564 fs_inst *inst; 1565 fs_reg temp; 1566 1567 assert(expr->get_num_operands() <= 2); 1568 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1569 assert(expr->operands[i]->type->is_scalar()); 1570 1571 expr->operands[i]->accept(this); 1572 op[i] = this->result; 1573 } 1574 1575 switch (expr->operation) { 1576 case ir_unop_logic_not: 1577 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1578 inst->conditional_mod = BRW_CONDITIONAL_Z; 1579 return; 1580 1581 case ir_binop_logic_xor: 1582 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1583 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1584 return; 1585 1586 case ir_binop_logic_or: 1587 temp = fs_reg(this, glsl_type::bool_type); 1588 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1589 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1590 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1591 return; 1592 1593 case ir_binop_logic_and: 1594 temp = fs_reg(this, glsl_type::bool_type); 1595 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1596 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1597 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1598 return; 1599 1600 case ir_unop_f2b: 1601 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1603 return; 1604 1605 case ir_unop_i2b: 1606 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1607 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1608 return; 1609 1610 case ir_binop_greater: 1611 case ir_binop_gequal: 1612 case ir_binop_less: 1613 case ir_binop_lequal: 1614 case ir_binop_equal: 1615 case ir_binop_all_equal: 1616 case ir_binop_nequal: 1617 case ir_binop_any_nequal: 1618 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1619 inst->conditional_mod = 1620 brw_conditional_for_comparison(expr->operation); 1621 return; 1622 default: 1623 assert(!"not reached"); 1624 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1625 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1626 fail("bad condition\n"); 1627 return; 1628 } 1629 return; 1630 } 1631 1632 ir->condition->accept(this); 1633 1634 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1635 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1636} 1637 1638void 1639fs_visitor::visit(ir_if *ir) 1640{ 1641 fs_inst *inst; 1642 1643 if (intel->gen < 6 && c->dispatch_width == 16) { 1644 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1645 } 1646 1647 /* Don't point the annotation at the if statement, because then it plus 1648 * the then and else blocks get printed. 1649 */ 1650 this->base_ir = ir->condition; 1651 1652 if (intel->gen == 6) { 1653 emit_if_gen6(ir); 1654 } else { 1655 emit_bool_to_cond_code(ir->condition); 1656 1657 inst = emit(BRW_OPCODE_IF); 1658 inst->predicated = true; 1659 } 1660 1661 foreach_list(node, &ir->then_instructions) { 1662 ir_instruction *ir = (ir_instruction *)node; 1663 this->base_ir = ir; 1664 1665 ir->accept(this); 1666 } 1667 1668 if (!ir->else_instructions.is_empty()) { 1669 emit(BRW_OPCODE_ELSE); 1670 1671 foreach_list(node, &ir->else_instructions) { 1672 ir_instruction *ir = (ir_instruction *)node; 1673 this->base_ir = ir; 1674 1675 ir->accept(this); 1676 } 1677 } 1678 1679 emit(BRW_OPCODE_ENDIF); 1680} 1681 1682void 1683fs_visitor::visit(ir_loop *ir) 1684{ 1685 fs_reg counter = reg_undef; 1686 1687 if (intel->gen < 6 && c->dispatch_width == 16) { 1688 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1689 } 1690 1691 if (ir->counter) { 1692 this->base_ir = ir->counter; 1693 ir->counter->accept(this); 1694 counter = *(variable_storage(ir->counter)); 1695 1696 if (ir->from) { 1697 this->base_ir = ir->from; 1698 ir->from->accept(this); 1699 1700 emit(BRW_OPCODE_MOV, counter, this->result); 1701 } 1702 } 1703 1704 this->base_ir = NULL; 1705 emit(BRW_OPCODE_DO); 1706 1707 if (ir->to) { 1708 this->base_ir = ir->to; 1709 ir->to->accept(this); 1710 1711 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1712 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1713 1714 inst = emit(BRW_OPCODE_BREAK); 1715 inst->predicated = true; 1716 } 1717 1718 foreach_list(node, &ir->body_instructions) { 1719 ir_instruction *ir = (ir_instruction *)node; 1720 1721 this->base_ir = ir; 1722 ir->accept(this); 1723 } 1724 1725 if (ir->increment) { 1726 this->base_ir = ir->increment; 1727 ir->increment->accept(this); 1728 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1729 } 1730 1731 this->base_ir = NULL; 1732 emit(BRW_OPCODE_WHILE); 1733} 1734 1735void 1736fs_visitor::visit(ir_loop_jump *ir) 1737{ 1738 switch (ir->mode) { 1739 case ir_loop_jump::jump_break: 1740 emit(BRW_OPCODE_BREAK); 1741 break; 1742 case ir_loop_jump::jump_continue: 1743 emit(BRW_OPCODE_CONTINUE); 1744 break; 1745 } 1746} 1747 1748void 1749fs_visitor::visit(ir_call *ir) 1750{ 1751 assert(!"FINISHME"); 1752} 1753 1754void 1755fs_visitor::visit(ir_return *ir) 1756{ 1757 assert(!"FINISHME"); 1758} 1759 1760void 1761fs_visitor::visit(ir_function *ir) 1762{ 1763 /* Ignore function bodies other than main() -- we shouldn't see calls to 1764 * them since they should all be inlined before we get to ir_to_mesa. 1765 */ 1766 if (strcmp(ir->name, "main") == 0) { 1767 const ir_function_signature *sig; 1768 exec_list empty; 1769 1770 sig = ir->matching_signature(&empty); 1771 1772 assert(sig); 1773 1774 foreach_list(node, &sig->body) { 1775 ir_instruction *ir = (ir_instruction *)node; 1776 this->base_ir = ir; 1777 1778 ir->accept(this); 1779 } 1780 } 1781} 1782 1783void 1784fs_visitor::visit(ir_function_signature *ir) 1785{ 1786 assert(!"not reached"); 1787 (void)ir; 1788} 1789 1790fs_inst * 1791fs_visitor::emit(fs_inst inst) 1792{ 1793 fs_inst *list_inst = new(mem_ctx) fs_inst; 1794 *list_inst = inst; 1795 1796 if (force_uncompressed_stack > 0) 1797 list_inst->force_uncompressed = true; 1798 else if (force_sechalf_stack > 0) 1799 list_inst->force_sechalf = true; 1800 1801 list_inst->annotation = this->current_annotation; 1802 list_inst->ir = this->base_ir; 1803 1804 this->instructions.push_tail(list_inst); 1805 1806 return list_inst; 1807} 1808 1809/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1810void 1811fs_visitor::emit_dummy_fs() 1812{ 1813 int reg_width = c->dispatch_width / 8; 1814 1815 /* Everyone's favorite color. */ 1816 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); 1817 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); 1818 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); 1819 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); 1820 1821 fs_inst *write; 1822 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1823 write->base_mrf = 2; 1824 write->mlen = 4 * reg_width; 1825 write->eot = true; 1826} 1827 1828/* The register location here is relative to the start of the URB 1829 * data. It will get adjusted to be a real location before 1830 * generate_code() time. 1831 */ 1832struct brw_reg 1833fs_visitor::interp_reg(int location, int channel) 1834{ 1835 int regnr = urb_setup[location] * 2 + channel / 2; 1836 int stride = (channel & 1) * 4; 1837 1838 assert(urb_setup[location] != -1); 1839 1840 return brw_vec1_grf(regnr, stride); 1841} 1842 1843/** Emits the interpolation for the varying inputs. */ 1844void 1845fs_visitor::emit_interpolation_setup_gen4() 1846{ 1847 this->current_annotation = "compute pixel centers"; 1848 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1849 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1850 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1851 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1852 1853 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1854 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1855 1856 this->current_annotation = "compute pixel deltas from v0"; 1857 if (brw->has_pln) { 1858 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1859 fs_reg(this, glsl_type::vec2_type); 1860 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1861 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; 1862 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; 1863 } else { 1864 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1865 fs_reg(this, glsl_type::float_type); 1866 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1867 fs_reg(this, glsl_type::float_type); 1868 } 1869 emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1870 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1871 emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1872 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1873 1874 this->current_annotation = "compute pos.w and 1/pos.w"; 1875 /* Compute wpos.w. It's always in our setup, since it's needed to 1876 * interpolate the other attributes. 1877 */ 1878 this->wpos_w = fs_reg(this, glsl_type::float_type); 1879 emit(FS_OPCODE_LINTERP, wpos_w, 1880 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1881 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1882 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1883 /* Compute the pixel 1/W value from wpos.w. */ 1884 this->pixel_w = fs_reg(this, glsl_type::float_type); 1885 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1886 this->current_annotation = NULL; 1887} 1888 1889/** Emits the interpolation for the varying inputs. */ 1890void 1891fs_visitor::emit_interpolation_setup_gen6() 1892{ 1893 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1894 1895 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1896 this->current_annotation = "compute pixel centers"; 1897 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1898 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1899 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1900 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1901 emit(BRW_OPCODE_ADD, 1902 int_pixel_x, 1903 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1904 fs_reg(brw_imm_v(0x10101010))); 1905 emit(BRW_OPCODE_ADD, 1906 int_pixel_y, 1907 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1908 fs_reg(brw_imm_v(0x11001100))); 1909 1910 /* As of gen6, we can no longer mix float and int sources. We have 1911 * to turn the integer pixel centers into floats for their actual 1912 * use. 1913 */ 1914 this->pixel_x = fs_reg(this, glsl_type::float_type); 1915 this->pixel_y = fs_reg(this, glsl_type::float_type); 1916 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1917 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1918 1919 this->current_annotation = "compute pos.w"; 1920 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1921 this->wpos_w = fs_reg(this, glsl_type::float_type); 1922 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1923 1924 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { 1925 uint8_t reg = c->barycentric_coord_reg[i]; 1926 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); 1927 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); 1928 } 1929 1930 this->current_annotation = NULL; 1931} 1932 1933void 1934fs_visitor::emit_color_write(int target, int index, int first_color_mrf) 1935{ 1936 int reg_width = c->dispatch_width / 8; 1937 fs_inst *inst; 1938 fs_reg color = outputs[target]; 1939 fs_reg mrf; 1940 1941 /* If there's no color data to be written, skip it. */ 1942 if (color.file == BAD_FILE) 1943 return; 1944 1945 color.reg_offset += index; 1946 1947 if (c->dispatch_width == 8 || intel->gen >= 6) { 1948 /* SIMD8 write looks like: 1949 * m + 0: r0 1950 * m + 1: r1 1951 * m + 2: g0 1952 * m + 3: g1 1953 * 1954 * gen6 SIMD16 DP write looks like: 1955 * m + 0: r0 1956 * m + 1: r1 1957 * m + 2: g0 1958 * m + 3: g1 1959 * m + 4: b0 1960 * m + 5: b1 1961 * m + 6: a0 1962 * m + 7: a1 1963 */ 1964 inst = emit(BRW_OPCODE_MOV, 1965 fs_reg(MRF, first_color_mrf + index * reg_width, color.type), 1966 color); 1967 inst->saturate = c->key.clamp_fragment_color; 1968 } else { 1969 /* pre-gen6 SIMD16 single source DP write looks like: 1970 * m + 0: r0 1971 * m + 1: g0 1972 * m + 2: b0 1973 * m + 3: a0 1974 * m + 4: r1 1975 * m + 5: g1 1976 * m + 6: b1 1977 * m + 7: a1 1978 */ 1979 if (brw->has_compr4) { 1980 /* By setting the high bit of the MRF register number, we 1981 * indicate that we want COMPR4 mode - instead of doing the 1982 * usual destination + 1 for the second half we get 1983 * destination + 4. 1984 */ 1985 inst = emit(BRW_OPCODE_MOV, 1986 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, 1987 color.type), 1988 color); 1989 inst->saturate = c->key.clamp_fragment_color; 1990 } else { 1991 push_force_uncompressed(); 1992 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index, 1993 color.type), 1994 color); 1995 inst->saturate = c->key.clamp_fragment_color; 1996 pop_force_uncompressed(); 1997 1998 push_force_sechalf(); 1999 color.sechalf = true; 2000 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4, 2001 color.type), 2002 color); 2003 inst->saturate = c->key.clamp_fragment_color; 2004 pop_force_sechalf(); 2005 color.sechalf = false; 2006 } 2007 } 2008} 2009 2010void 2011fs_visitor::emit_fb_writes() 2012{ 2013 this->current_annotation = "FB write header"; 2014 bool header_present = true; 2015 /* We can potentially have a message length of up to 15, so we have to set 2016 * base_mrf to either 0 or 1 in order to fit in m0..m15. 2017 */ 2018 int base_mrf = 1; 2019 int nr = base_mrf; 2020 int reg_width = c->dispatch_width / 8; 2021 bool do_dual_src = this->dual_src_output.file != BAD_FILE; 2022 2023 if (c->dispatch_width == 16 && do_dual_src) { 2024 fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); 2025 do_dual_src = false; 2026 } 2027 2028 /* From the Sandy Bridge PRM, volume 4, page 198: 2029 * 2030 * "Dispatched Pixel Enables. One bit per pixel indicating 2031 * which pixels were originally enabled when the thread was 2032 * dispatched. This field is only required for the end-of- 2033 * thread message and on all dual-source messages." 2034 */ 2035 if (intel->gen >= 6 && 2036 !this->fp->UsesKill && 2037 !do_dual_src && 2038 c->key.nr_color_regions == 1) { 2039 header_present = false; 2040 } 2041 2042 if (header_present) { 2043 /* m2, m3 header */ 2044 nr += 2; 2045 } 2046 2047 if (c->aa_dest_stencil_reg) { 2048 push_force_uncompressed(); 2049 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2050 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 2051 pop_force_uncompressed(); 2052 } 2053 2054 /* Reserve space for color. It'll be filled in per MRT below. */ 2055 int color_mrf = nr; 2056 nr += 4 * reg_width; 2057 if (do_dual_src) 2058 nr += 4; 2059 2060 if (c->source_depth_to_render_target) { 2061 if (intel->gen == 6 && c->dispatch_width == 16) { 2062 /* For outputting oDepth on gen6, SIMD8 writes have to be 2063 * used. This would require 8-wide moves of each half to 2064 * message regs, kind of like pre-gen5 SIMD16 FB writes. 2065 * Just bail on doing so for now. 2066 */ 2067 fail("Missing support for simd16 depth writes on gen6\n"); 2068 } 2069 2070 if (c->computes_depth) { 2071 /* Hand over gl_FragDepth. */ 2072 assert(this->frag_depth); 2073 fs_reg depth = *(variable_storage(this->frag_depth)); 2074 2075 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 2076 } else { 2077 /* Pass through the payload depth. */ 2078 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2079 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 2080 } 2081 nr += reg_width; 2082 } 2083 2084 if (c->dest_depth_reg) { 2085 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2086 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 2087 nr += reg_width; 2088 } 2089 2090 if (do_dual_src) { 2091 fs_reg src0 = this->outputs[0]; 2092 fs_reg src1 = this->dual_src_output; 2093 2094 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2095 "FB write src0"); 2096 for (int i = 0; i < 4; i++) { 2097 fs_inst *inst = emit(BRW_OPCODE_MOV, 2098 fs_reg(MRF, color_mrf + i, src0.type), 2099 src0); 2100 src0.reg_offset++; 2101 inst->saturate = c->key.clamp_fragment_color; 2102 } 2103 2104 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2105 "FB write src1"); 2106 for (int i = 0; i < 4; i++) { 2107 fs_inst *inst = emit(BRW_OPCODE_MOV, 2108 fs_reg(MRF, color_mrf + 4 + i, src1.type), 2109 src1); 2110 src1.reg_offset++; 2111 inst->saturate = c->key.clamp_fragment_color; 2112 } 2113 2114 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2115 inst->target = 0; 2116 inst->base_mrf = base_mrf; 2117 inst->mlen = nr - base_mrf; 2118 inst->eot = true; 2119 inst->header_present = header_present; 2120 2121 c->prog_data.dual_src_blend = true; 2122 this->current_annotation = NULL; 2123 return; 2124 } 2125 2126 for (int target = 0; target < c->key.nr_color_regions; target++) { 2127 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2128 "FB write target %d", 2129 target); 2130 for (unsigned i = 0; i < this->output_components[target]; i++) 2131 emit_color_write(target, i, color_mrf); 2132 2133 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2134 inst->target = target; 2135 inst->base_mrf = base_mrf; 2136 inst->mlen = nr - base_mrf; 2137 if (target == c->key.nr_color_regions - 1) 2138 inst->eot = true; 2139 inst->header_present = header_present; 2140 } 2141 2142 if (c->key.nr_color_regions == 0) { 2143 /* Even if there's no color buffers enabled, we still need to send 2144 * alpha out the pipeline to our null renderbuffer to support 2145 * alpha-testing, alpha-to-coverage, and so on. 2146 */ 2147 emit_color_write(0, 3, color_mrf); 2148 2149 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2150 inst->base_mrf = base_mrf; 2151 inst->mlen = nr - base_mrf; 2152 inst->eot = true; 2153 inst->header_present = header_present; 2154 } 2155 2156 this->current_annotation = NULL; 2157} 2158 2159void 2160fs_visitor::resolve_ud_negate(fs_reg *reg) 2161{ 2162 if (reg->type != BRW_REGISTER_TYPE_UD || 2163 !reg->negate) 2164 return; 2165 2166 fs_reg temp = fs_reg(this, glsl_type::uint_type); 2167 emit(BRW_OPCODE_MOV, temp, *reg); 2168 *reg = temp; 2169} 2170 2171void 2172fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) 2173{ 2174 if (rvalue->type != glsl_type::bool_type) 2175 return; 2176 2177 fs_reg temp = fs_reg(this, glsl_type::bool_type); 2178 emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1)); 2179 *reg = temp; 2180} 2181 2182fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 2183 struct brw_shader *shader) 2184{ 2185 this->c = c; 2186 this->p = &c->func; 2187 this->brw = p->brw; 2188 this->fp = (struct gl_fragment_program *) 2189 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 2190 this->prog = prog; 2191 this->intel = &brw->intel; 2192 this->ctx = &intel->ctx; 2193 this->mem_ctx = ralloc_context(NULL); 2194 this->shader = shader; 2195 this->failed = false; 2196 this->variable_ht = hash_table_ctor(0, 2197 hash_table_pointer_hash, 2198 hash_table_pointer_compare); 2199 2200 /* There's a question that appears to be left open in the spec: 2201 * How do implicit dst conversions interact with the CMP 2202 * instruction or conditional mods? On gen6, the instruction: 2203 * 2204 * CMP null<d> src0<f> src1<f> 2205 * 2206 * will do src1 - src0 and compare that result as if it was an 2207 * integer. On gen4, it will do src1 - src0 as float, convert 2208 * the result to int, and compare as int. In between, it 2209 * appears that it does src1 - src0 and does the compare in the 2210 * execution type so dst type doesn't matter. 2211 */ 2212 if (this->intel->gen > 4) 2213 this->reg_null_cmp = reg_null_d; 2214 else 2215 this->reg_null_cmp = reg_null_f; 2216 2217 this->frag_depth = NULL; 2218 memset(this->outputs, 0, sizeof(this->outputs)); 2219 this->first_non_payload_grf = 0; 2220 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 2221 2222 this->current_annotation = NULL; 2223 this->base_ir = NULL; 2224 2225 this->virtual_grf_sizes = NULL; 2226 this->virtual_grf_count = 0; 2227 this->virtual_grf_array_size = 0; 2228 this->virtual_grf_def = NULL; 2229 this->virtual_grf_use = NULL; 2230 this->live_intervals_valid = false; 2231 2232 this->force_uncompressed_stack = 0; 2233 this->force_sechalf_stack = 0; 2234} 2235 2236fs_visitor::~fs_visitor() 2237{ 2238 ralloc_free(this->mem_ctx); 2239 hash_table_dtor(this->variable_ht); 2240} 2241