brw_fs_visitor.cpp revision 2ea3ab14f2182978f471674c9dfce029d37f70a7
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "glsl/glsl_types.h" 50#include "glsl/ir_optimization.h" 51#include "glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (ir->mode == ir_var_in) { 62 if (!strcmp(ir->name, "gl_FragCoord")) { 63 reg = emit_fragcoord_interpolation(ir); 64 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 65 reg = emit_frontfacing_interpolation(ir); 66 } else { 67 reg = emit_general_interpolation(ir); 68 } 69 assert(reg); 70 hash_table_insert(this->variable_ht, reg, ir); 71 return; 72 } else if (ir->mode == ir_var_out) { 73 reg = new(this->mem_ctx) fs_reg(this, ir->type); 74 75 if (ir->index > 0) { 76 assert(ir->location == FRAG_RESULT_DATA0); 77 assert(ir->index == 1); 78 this->dual_src_output = *reg; 79 } else if (ir->location == FRAG_RESULT_COLOR) { 80 /* Writing gl_FragColor outputs to all color regions. */ 81 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { 82 this->outputs[i] = *reg; 83 this->output_components[i] = 4; 84 } 85 } else if (ir->location == FRAG_RESULT_DEPTH) { 86 this->frag_depth = ir; 87 } else { 88 /* gl_FragData or a user-defined FS output */ 89 assert(ir->location >= FRAG_RESULT_DATA0 && 90 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); 91 92 int vector_elements = 93 ir->type->is_array() ? ir->type->fields.array->vector_elements 94 : ir->type->vector_elements; 95 96 /* General color output. */ 97 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { 98 int output = ir->location - FRAG_RESULT_DATA0 + i; 99 this->outputs[output] = *reg; 100 this->outputs[output].reg_offset += vector_elements * i; 101 this->output_components[output] = vector_elements; 102 } 103 } 104 } else if (ir->mode == ir_var_uniform) { 105 int param_index = c->prog_data.nr_params; 106 107 if (c->dispatch_width == 16) { 108 if (!variable_storage(ir)) { 109 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 110 } 111 return; 112 } 113 114 if (!strncmp(ir->name, "gl_", 3)) { 115 setup_builtin_uniform_values(ir); 116 } else { 117 setup_uniform_values(ir->location, ir->type); 118 } 119 120 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 121 reg->type = brw_type_for_base_type(ir->type); 122 } 123 124 if (!reg) 125 reg = new(this->mem_ctx) fs_reg(this, ir->type); 126 127 hash_table_insert(this->variable_ht, reg, ir); 128} 129 130void 131fs_visitor::visit(ir_dereference_variable *ir) 132{ 133 fs_reg *reg = variable_storage(ir->var); 134 this->result = *reg; 135} 136 137void 138fs_visitor::visit(ir_dereference_record *ir) 139{ 140 const glsl_type *struct_type = ir->record->type; 141 142 ir->record->accept(this); 143 144 unsigned int offset = 0; 145 for (unsigned int i = 0; i < struct_type->length; i++) { 146 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 147 break; 148 offset += type_size(struct_type->fields.structure[i].type); 149 } 150 this->result.reg_offset += offset; 151 this->result.type = brw_type_for_base_type(ir->type); 152} 153 154void 155fs_visitor::visit(ir_dereference_array *ir) 156{ 157 ir_constant *index; 158 int element_size; 159 160 ir->array->accept(this); 161 index = ir->array_index->as_constant(); 162 163 element_size = type_size(ir->type); 164 this->result.type = brw_type_for_base_type(ir->type); 165 166 if (index) { 167 assert(this->result.file == UNIFORM || this->result.file == GRF); 168 this->result.reg_offset += index->value.i[0] * element_size; 169 } else { 170 assert(!"FINISHME: non-constant array element"); 171 } 172} 173 174/* Instruction selection: Produce a MOV.sat instead of 175 * MIN(MAX(val, 0), 1) when possible. 176 */ 177bool 178fs_visitor::try_emit_saturate(ir_expression *ir) 179{ 180 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 181 182 if (!sat_val) 183 return false; 184 185 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); 186 187 sat_val->accept(this); 188 fs_reg src = this->result; 189 190 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); 191 192 /* If the last instruction from our accept() didn't generate our 193 * src, generate a saturated MOV 194 */ 195 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); 196 if (!modify || modify->regs_written() != 1) { 197 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 198 inst->saturate = true; 199 } else { 200 modify->saturate = true; 201 this->result = src; 202 } 203 204 205 return true; 206} 207 208bool 209fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) 210{ 211 /* 3-src instructions were introduced in gen6. */ 212 if (intel->gen < 6) 213 return false; 214 215 /* MAD can only handle floating-point data. */ 216 if (ir->type != glsl_type::float_type) 217 return false; 218 219 ir_rvalue *nonmul = ir->operands[1 - mul_arg]; 220 ir_expression *mul = ir->operands[mul_arg]->as_expression(); 221 222 if (!mul || mul->operation != ir_binop_mul) 223 return false; 224 225 if (nonmul->as_constant() || 226 mul->operands[0]->as_constant() || 227 mul->operands[1]->as_constant()) 228 return false; 229 230 nonmul->accept(this); 231 fs_reg src0 = this->result; 232 233 mul->operands[0]->accept(this); 234 fs_reg src1 = this->result; 235 236 mul->operands[1]->accept(this); 237 fs_reg src2 = this->result; 238 239 this->result = fs_reg(this, ir->type); 240 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); 241 242 return true; 243} 244 245void 246fs_visitor::visit(ir_expression *ir) 247{ 248 unsigned int operand; 249 fs_reg op[2], temp; 250 fs_inst *inst; 251 252 assert(ir->get_num_operands() <= 2); 253 254 if (try_emit_saturate(ir)) 255 return; 256 if (ir->operation == ir_binop_add) { 257 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) 258 return; 259 } 260 261 for (operand = 0; operand < ir->get_num_operands(); operand++) { 262 ir->operands[operand]->accept(this); 263 if (this->result.file == BAD_FILE) { 264 ir_print_visitor v; 265 fail("Failed to get tree for expression operand:\n"); 266 ir->operands[operand]->accept(&v); 267 } 268 op[operand] = this->result; 269 270 /* Matrix expression operands should have been broken down to vector 271 * operations already. 272 */ 273 assert(!ir->operands[operand]->type->is_matrix()); 274 /* And then those vector operands should have been broken down to scalar. 275 */ 276 assert(!ir->operands[operand]->type->is_vector()); 277 } 278 279 /* Storage for our result. If our result goes into an assignment, it will 280 * just get copy-propagated out, so no worries. 281 */ 282 this->result = fs_reg(this, ir->type); 283 284 switch (ir->operation) { 285 case ir_unop_logic_not: 286 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 287 * ones complement of the whole register, not just bit 0. 288 */ 289 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 290 break; 291 case ir_unop_neg: 292 op[0].negate = !op[0].negate; 293 this->result = op[0]; 294 break; 295 case ir_unop_abs: 296 op[0].abs = true; 297 op[0].negate = false; 298 this->result = op[0]; 299 break; 300 case ir_unop_sign: 301 temp = fs_reg(this, ir->type); 302 303 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 304 305 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 306 inst->conditional_mod = BRW_CONDITIONAL_G; 307 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 308 inst->predicated = true; 309 310 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 311 inst->conditional_mod = BRW_CONDITIONAL_L; 312 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 313 inst->predicated = true; 314 315 break; 316 case ir_unop_rcp: 317 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 318 break; 319 320 case ir_unop_exp2: 321 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 322 break; 323 case ir_unop_log2: 324 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 325 break; 326 case ir_unop_exp: 327 case ir_unop_log: 328 assert(!"not reached: should be handled by ir_explog_to_explog2"); 329 break; 330 case ir_unop_sin: 331 case ir_unop_sin_reduced: 332 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 333 break; 334 case ir_unop_cos: 335 case ir_unop_cos_reduced: 336 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 337 break; 338 339 case ir_unop_dFdx: 340 emit(FS_OPCODE_DDX, this->result, op[0]); 341 break; 342 case ir_unop_dFdy: 343 emit(FS_OPCODE_DDY, this->result, op[0]); 344 break; 345 346 case ir_binop_add: 347 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 348 break; 349 case ir_binop_sub: 350 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 351 break; 352 353 case ir_binop_mul: 354 if (ir->type->is_integer()) { 355 /* For integer multiplication, the MUL uses the low 16 bits 356 * of one of the operands (src0 on gen6, src1 on gen7). The 357 * MACH accumulates in the contribution of the upper 16 bits 358 * of that operand. 359 * 360 * FINISHME: Emit just the MUL if we know an operand is small 361 * enough. 362 */ 363 if (intel->gen >= 7 && c->dispatch_width == 16) 364 fail("16-wide explicit accumulator operands unsupported\n"); 365 366 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); 367 368 emit(BRW_OPCODE_MUL, acc, op[0], op[1]); 369 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); 370 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); 371 } else { 372 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 373 } 374 break; 375 case ir_binop_div: 376 if (intel->gen >= 7 && c->dispatch_width == 16) 377 fail("16-wide INTDIV unsupported\n"); 378 379 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ 380 assert(ir->type->is_integer()); 381 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); 382 break; 383 case ir_binop_mod: 384 if (intel->gen >= 7 && c->dispatch_width == 16) 385 fail("16-wide INTDIV unsupported\n"); 386 387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 388 assert(ir->type->is_integer()); 389 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); 390 break; 391 392 case ir_binop_less: 393 case ir_binop_greater: 394 case ir_binop_lequal: 395 case ir_binop_gequal: 396 case ir_binop_equal: 397 case ir_binop_all_equal: 398 case ir_binop_nequal: 399 case ir_binop_any_nequal: 400 temp = this->result; 401 /* original gen4 does implicit conversion before comparison. */ 402 if (intel->gen < 5) 403 temp.type = op[0].type; 404 405 resolve_ud_negate(&op[0]); 406 resolve_ud_negate(&op[1]); 407 408 resolve_bool_comparison(ir->operands[0], &op[0]); 409 resolve_bool_comparison(ir->operands[1], &op[1]); 410 411 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 412 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 413 break; 414 415 case ir_binop_logic_xor: 416 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 417 break; 418 419 case ir_binop_logic_or: 420 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 421 break; 422 423 case ir_binop_logic_and: 424 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 425 break; 426 427 case ir_binop_dot: 428 case ir_unop_any: 429 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 430 break; 431 432 case ir_unop_noise: 433 assert(!"not reached: should be handled by lower_noise"); 434 break; 435 436 case ir_quadop_vector: 437 assert(!"not reached: should be handled by lower_quadop_vector"); 438 break; 439 440 case ir_unop_sqrt: 441 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 442 break; 443 444 case ir_unop_rsq: 445 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 446 break; 447 448 case ir_unop_bitcast_i2f: 449 case ir_unop_bitcast_u2f: 450 op[0].type = BRW_REGISTER_TYPE_F; 451 this->result = op[0]; 452 break; 453 case ir_unop_i2u: 454 case ir_unop_bitcast_f2u: 455 op[0].type = BRW_REGISTER_TYPE_UD; 456 this->result = op[0]; 457 break; 458 case ir_unop_u2i: 459 case ir_unop_bitcast_f2i: 460 op[0].type = BRW_REGISTER_TYPE_D; 461 this->result = op[0]; 462 break; 463 case ir_unop_i2f: 464 case ir_unop_u2f: 465 case ir_unop_f2i: 466 case ir_unop_f2u: 467 emit(BRW_OPCODE_MOV, this->result, op[0]); 468 break; 469 470 case ir_unop_b2i: 471 inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1)); 472 break; 473 case ir_unop_b2f: 474 temp = fs_reg(this, glsl_type::int_type); 475 emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1)); 476 emit(BRW_OPCODE_MOV, this->result, temp); 477 break; 478 479 case ir_unop_f2b: 480 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)); 481 inst->conditional_mod = BRW_CONDITIONAL_NZ; 482 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 483 break; 484 case ir_unop_i2b: 485 assert(op[0].type == BRW_REGISTER_TYPE_D); 486 487 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0)); 488 inst->conditional_mod = BRW_CONDITIONAL_NZ; 489 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 490 break; 491 492 case ir_unop_trunc: 493 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 494 break; 495 case ir_unop_ceil: 496 op[0].negate = !op[0].negate; 497 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 498 this->result.negate = true; 499 break; 500 case ir_unop_floor: 501 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 502 break; 503 case ir_unop_fract: 504 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 505 break; 506 case ir_unop_round_even: 507 emit(BRW_OPCODE_RNDE, this->result, op[0]); 508 break; 509 510 case ir_binop_min: 511 resolve_ud_negate(&op[0]); 512 resolve_ud_negate(&op[1]); 513 514 if (intel->gen >= 6) { 515 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 516 inst->conditional_mod = BRW_CONDITIONAL_L; 517 } else { 518 /* Unalias the destination */ 519 this->result = fs_reg(this, ir->type); 520 521 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 522 inst->conditional_mod = BRW_CONDITIONAL_L; 523 524 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 525 inst->predicated = true; 526 } 527 break; 528 case ir_binop_max: 529 resolve_ud_negate(&op[0]); 530 resolve_ud_negate(&op[1]); 531 532 if (intel->gen >= 6) { 533 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 534 inst->conditional_mod = BRW_CONDITIONAL_GE; 535 } else { 536 /* Unalias the destination */ 537 this->result = fs_reg(this, ir->type); 538 539 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 540 inst->conditional_mod = BRW_CONDITIONAL_G; 541 542 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 543 inst->predicated = true; 544 } 545 break; 546 547 case ir_binop_pow: 548 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 549 break; 550 551 case ir_unop_bit_not: 552 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 553 break; 554 case ir_binop_bit_and: 555 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 556 break; 557 case ir_binop_bit_xor: 558 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 559 break; 560 case ir_binop_bit_or: 561 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 562 break; 563 564 case ir_binop_lshift: 565 inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]); 566 break; 567 568 case ir_binop_rshift: 569 if (ir->type->base_type == GLSL_TYPE_INT) 570 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]); 571 else 572 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]); 573 break; 574 575 case ir_binop_ubo_load: 576 assert(!"not yet supported"); 577 break; 578 } 579} 580 581void 582fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 583 const glsl_type *type, bool predicated) 584{ 585 switch (type->base_type) { 586 case GLSL_TYPE_FLOAT: 587 case GLSL_TYPE_UINT: 588 case GLSL_TYPE_INT: 589 case GLSL_TYPE_BOOL: 590 for (unsigned int i = 0; i < type->components(); i++) { 591 l.type = brw_type_for_base_type(type); 592 r.type = brw_type_for_base_type(type); 593 594 if (predicated || !l.equals(r)) { 595 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 596 inst->predicated = predicated; 597 } 598 599 l.reg_offset++; 600 r.reg_offset++; 601 } 602 break; 603 case GLSL_TYPE_ARRAY: 604 for (unsigned int i = 0; i < type->length; i++) { 605 emit_assignment_writes(l, r, type->fields.array, predicated); 606 } 607 break; 608 609 case GLSL_TYPE_STRUCT: 610 for (unsigned int i = 0; i < type->length; i++) { 611 emit_assignment_writes(l, r, type->fields.structure[i].type, 612 predicated); 613 } 614 break; 615 616 case GLSL_TYPE_SAMPLER: 617 break; 618 619 default: 620 assert(!"not reached"); 621 break; 622 } 623} 624 625/* If the RHS processing resulted in an instruction generating a 626 * temporary value, and it would be easy to rewrite the instruction to 627 * generate its result right into the LHS instead, do so. This ends 628 * up reliably removing instructions where it can be tricky to do so 629 * later without real UD chain information. 630 */ 631bool 632fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 633 fs_reg dst, 634 fs_reg src, 635 fs_inst *pre_rhs_inst, 636 fs_inst *last_rhs_inst) 637{ 638 /* Only attempt if we're doing a direct assignment. */ 639 if (ir->condition || 640 !(ir->lhs->type->is_scalar() || 641 (ir->lhs->type->is_vector() && 642 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) 643 return false; 644 645 /* Make sure the last instruction generated our source reg. */ 646 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, 647 last_rhs_inst, 648 src); 649 if (!modify) 650 return false; 651 652 /* If last_rhs_inst wrote a different number of components than our LHS, 653 * we can't safely rewrite it. 654 */ 655 if (ir->lhs->type->vector_elements != modify->regs_written()) 656 return false; 657 658 /* Success! Rewrite the instruction. */ 659 modify->dst = dst; 660 661 return true; 662} 663 664void 665fs_visitor::visit(ir_assignment *ir) 666{ 667 fs_reg l, r; 668 fs_inst *inst; 669 670 /* FINISHME: arrays on the lhs */ 671 ir->lhs->accept(this); 672 l = this->result; 673 674 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); 675 676 ir->rhs->accept(this); 677 r = this->result; 678 679 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); 680 681 assert(l.file != BAD_FILE); 682 assert(r.file != BAD_FILE); 683 684 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) 685 return; 686 687 if (ir->condition) { 688 emit_bool_to_cond_code(ir->condition); 689 } 690 691 if (ir->lhs->type->is_scalar() || 692 ir->lhs->type->is_vector()) { 693 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 694 if (ir->write_mask & (1 << i)) { 695 inst = emit(BRW_OPCODE_MOV, l, r); 696 if (ir->condition) 697 inst->predicated = true; 698 r.reg_offset++; 699 } 700 l.reg_offset++; 701 } 702 } else { 703 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 704 } 705} 706 707fs_inst * 708fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 709 fs_reg shadow_c, fs_reg lod, fs_reg dPdy, 710 int sampler) 711{ 712 int mlen; 713 int base_mrf = 1; 714 bool simd16 = false; 715 fs_reg orig_dst; 716 717 /* g0 header. */ 718 mlen = 1; 719 720 if (ir->shadow_comparitor) { 721 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 722 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 723 coordinate.reg_offset++; 724 } 725 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 726 mlen += 3; 727 728 if (ir->op == ir_tex) { 729 /* There's no plain shadow compare message, so we use shadow 730 * compare with a bias of 0.0. 731 */ 732 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 733 mlen++; 734 } else if (ir->op == ir_txb || ir->op == ir_txl) { 735 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 736 mlen++; 737 } else { 738 assert(!"Should not get here."); 739 } 740 741 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 742 mlen++; 743 } else if (ir->op == ir_tex) { 744 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 745 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 746 coordinate.reg_offset++; 747 } 748 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 749 mlen += 3; 750 } else if (ir->op == ir_txd) { 751 fs_reg &dPdx = lod; 752 753 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 754 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 755 coordinate.reg_offset++; 756 } 757 /* the slots for u and v are always present, but r is optional */ 758 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 759 760 /* P = u, v, r 761 * dPdx = dudx, dvdx, drdx 762 * dPdy = dudy, dvdy, drdy 763 * 764 * 1-arg: Does not exist. 765 * 766 * 2-arg: dudx dvdx dudy dvdy 767 * dPdx.x dPdx.y dPdy.x dPdy.y 768 * m4 m5 m6 m7 769 * 770 * 3-arg: dudx dvdx drdx dudy dvdy drdy 771 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 772 * m5 m6 m7 m8 m9 m10 773 */ 774 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 775 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 776 dPdx.reg_offset++; 777 } 778 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 779 780 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 781 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 782 dPdy.reg_offset++; 783 } 784 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 785 } else if (ir->op == ir_txs) { 786 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ 787 simd16 = true; 788 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 789 mlen += 2; 790 } else { 791 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 792 * instructions. We'll need to do SIMD16 here. 793 */ 794 simd16 = true; 795 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); 796 797 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 798 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), 799 coordinate); 800 coordinate.reg_offset++; 801 } 802 803 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to 804 * be necessary for TXF (ld), but seems wise to do for all messages. 805 */ 806 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { 807 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); 808 } 809 810 /* lod/bias appears after u/v/r. */ 811 mlen += 6; 812 813 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, lod.type), lod); 814 mlen++; 815 816 /* The unused upper half. */ 817 mlen++; 818 } 819 820 if (simd16) { 821 /* Now, since we're doing simd16, the return is 2 interleaved 822 * vec4s where the odd-indexed ones are junk. We'll need to move 823 * this weirdness around to the expected layout. 824 */ 825 orig_dst = dst; 826 const glsl_type *vec_type = 827 glsl_type::get_instance(ir->type->base_type, 4, 1); 828 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); 829 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) 830 : BRW_REGISTER_TYPE_F; 831 } 832 833 fs_inst *inst = NULL; 834 switch (ir->op) { 835 case ir_tex: 836 inst = emit(SHADER_OPCODE_TEX, dst); 837 break; 838 case ir_txb: 839 inst = emit(FS_OPCODE_TXB, dst); 840 break; 841 case ir_txl: 842 inst = emit(SHADER_OPCODE_TXL, dst); 843 break; 844 case ir_txd: 845 inst = emit(SHADER_OPCODE_TXD, dst); 846 break; 847 case ir_txs: 848 inst = emit(SHADER_OPCODE_TXS, dst); 849 break; 850 case ir_txf: 851 inst = emit(SHADER_OPCODE_TXF, dst); 852 break; 853 } 854 inst->base_mrf = base_mrf; 855 inst->mlen = mlen; 856 inst->header_present = true; 857 858 if (simd16) { 859 for (int i = 0; i < 4; i++) { 860 emit(BRW_OPCODE_MOV, orig_dst, dst); 861 orig_dst.reg_offset++; 862 dst.reg_offset += 2; 863 } 864 } 865 866 return inst; 867} 868 869/* gen5's sampler has slots for u, v, r, array index, then optional 870 * parameters like shadow comparitor or LOD bias. If optional 871 * parameters aren't present, those base slots are optional and don't 872 * need to be included in the message. 873 * 874 * We don't fill in the unnecessary slots regardless, which may look 875 * surprising in the disassembly. 876 */ 877fs_inst * 878fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 879 fs_reg shadow_c, fs_reg lod, fs_reg lod2, 880 int sampler) 881{ 882 int mlen = 0; 883 int base_mrf = 2; 884 int reg_width = c->dispatch_width / 8; 885 bool header_present = false; 886 const int vector_elements = 887 ir->coordinate ? ir->coordinate->type->vector_elements : 0; 888 889 if (ir->offset != NULL && ir->op == ir_txf) { 890 /* It appears that the ld instruction used for txf does its 891 * address bounds check before adding in the offset. To work 892 * around this, just add the integer offset to the integer texel 893 * coordinate, and don't put the offset in the header. 894 */ 895 ir_constant *offset = ir->offset->as_constant(); 896 for (int i = 0; i < vector_elements; i++) { 897 emit(BRW_OPCODE_ADD, 898 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 899 coordinate, 900 offset->value.i[i]); 901 coordinate.reg_offset++; 902 } 903 } else { 904 if (ir->offset) { 905 /* The offsets set up by the ir_texture visitor are in the 906 * m1 header, so we can't go headerless. 907 */ 908 header_present = true; 909 mlen++; 910 base_mrf--; 911 } 912 913 for (int i = 0; i < vector_elements; i++) { 914 emit(BRW_OPCODE_MOV, 915 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 916 coordinate); 917 coordinate.reg_offset++; 918 } 919 } 920 mlen += vector_elements * reg_width; 921 922 if (ir->shadow_comparitor) { 923 mlen = MAX2(mlen, header_present + 4 * reg_width); 924 925 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 926 mlen += reg_width; 927 } 928 929 fs_inst *inst = NULL; 930 switch (ir->op) { 931 case ir_tex: 932 inst = emit(SHADER_OPCODE_TEX, dst); 933 break; 934 case ir_txb: 935 mlen = MAX2(mlen, header_present + 4 * reg_width); 936 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 937 mlen += reg_width; 938 939 inst = emit(FS_OPCODE_TXB, dst); 940 break; 941 case ir_txl: 942 mlen = MAX2(mlen, header_present + 4 * reg_width); 943 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 944 mlen += reg_width; 945 946 inst = emit(SHADER_OPCODE_TXL, dst); 947 break; 948 case ir_txd: { 949 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 950 951 /** 952 * P = u, v, r 953 * dPdx = dudx, dvdx, drdx 954 * dPdy = dudy, dvdy, drdy 955 * 956 * Load up these values: 957 * - dudx dudy dvdx dvdy drdx drdy 958 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 959 */ 960 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 961 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 962 lod.reg_offset++; 963 mlen += reg_width; 964 965 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2); 966 lod2.reg_offset++; 967 mlen += reg_width; 968 } 969 970 inst = emit(SHADER_OPCODE_TXD, dst); 971 break; 972 } 973 case ir_txs: 974 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 975 mlen += reg_width; 976 inst = emit(SHADER_OPCODE_TXS, dst); 977 break; 978 case ir_txf: 979 mlen = header_present + 4 * reg_width; 980 981 emit(BRW_OPCODE_MOV, 982 fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), 983 lod); 984 inst = emit(SHADER_OPCODE_TXF, dst); 985 break; 986 } 987 inst->base_mrf = base_mrf; 988 inst->mlen = mlen; 989 inst->header_present = header_present; 990 991 if (mlen > 11) { 992 fail("Message length >11 disallowed by hardware\n"); 993 } 994 995 return inst; 996} 997 998fs_inst * 999fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 1000 fs_reg shadow_c, fs_reg lod, fs_reg lod2, 1001 int sampler) 1002{ 1003 int mlen = 0; 1004 int base_mrf = 2; 1005 int reg_width = c->dispatch_width / 8; 1006 bool header_present = false; 1007 int offsets[3]; 1008 1009 if (ir->offset && ir->op != ir_txf) { 1010 /* The offsets set up by the ir_texture visitor are in the 1011 * m1 header, so we can't go headerless. 1012 */ 1013 header_present = true; 1014 mlen++; 1015 base_mrf--; 1016 } 1017 1018 if (ir->shadow_comparitor) { 1019 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), shadow_c); 1020 mlen += reg_width; 1021 } 1022 1023 /* Set up the LOD info */ 1024 switch (ir->op) { 1025 case ir_tex: 1026 break; 1027 case ir_txb: 1028 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1029 mlen += reg_width; 1030 break; 1031 case ir_txl: 1032 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1033 mlen += reg_width; 1034 break; 1035 case ir_txd: { 1036 if (c->dispatch_width == 16) 1037 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 1038 1039 /* Load dPdx and the coordinate together: 1040 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 1041 */ 1042 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1043 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1044 coordinate.reg_offset++; 1045 mlen += reg_width; 1046 1047 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod); 1048 lod.reg_offset++; 1049 mlen += reg_width; 1050 1051 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), lod2); 1052 lod2.reg_offset++; 1053 mlen += reg_width; 1054 } 1055 break; 1056 } 1057 case ir_txs: 1058 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); 1059 mlen += reg_width; 1060 break; 1061 case ir_txf: 1062 /* It appears that the ld instruction used for txf does its 1063 * address bounds check before adding in the offset. To work 1064 * around this, just add the integer offset to the integer texel 1065 * coordinate, and don't put the offset in the header. 1066 */ 1067 if (ir->offset) { 1068 ir_constant *offset = ir->offset->as_constant(); 1069 offsets[0] = offset->value.i[0]; 1070 offsets[1] = offset->value.i[1]; 1071 offsets[2] = offset->value.i[2]; 1072 } else { 1073 memset(offsets, 0, sizeof(offsets)); 1074 } 1075 1076 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ 1077 emit(BRW_OPCODE_ADD, 1078 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]); 1079 coordinate.reg_offset++; 1080 mlen += reg_width; 1081 1082 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod); 1083 mlen += reg_width; 1084 1085 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { 1086 emit(BRW_OPCODE_ADD, 1087 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]); 1088 coordinate.reg_offset++; 1089 mlen += reg_width; 1090 } 1091 break; 1092 } 1093 1094 /* Set up the coordinate (except for cases where it was done above) */ 1095 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { 1096 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1097 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1098 coordinate.reg_offset++; 1099 mlen += reg_width; 1100 } 1101 } 1102 1103 /* Generate the SEND */ 1104 fs_inst *inst = NULL; 1105 switch (ir->op) { 1106 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; 1107 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 1108 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; 1109 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; 1110 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; 1111 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; 1112 } 1113 inst->base_mrf = base_mrf; 1114 inst->mlen = mlen; 1115 inst->header_present = header_present; 1116 1117 if (mlen > 11) { 1118 fail("Message length >11 disallowed by hardware\n"); 1119 } 1120 1121 return inst; 1122} 1123 1124/** 1125 * Emit code to produce the coordinates for a texture lookup. 1126 * 1127 * Returns the fs_reg containing the texture coordinate (as opposed to 1128 * setting this->result). 1129 */ 1130fs_reg 1131fs_visitor::emit_texcoord(ir_texture *ir, int sampler) 1132{ 1133 fs_inst *inst = NULL; 1134 1135 if (!ir->coordinate) 1136 return fs_reg(); /* Return the default BAD_FILE register. */ 1137 1138 ir->coordinate->accept(this); 1139 fs_reg coordinate = this->result; 1140 1141 bool needs_gl_clamp = true; 1142 1143 fs_reg scale_x, scale_y; 1144 1145 /* The 965 requires the EU to do the normalization of GL rectangle 1146 * texture coordinates. We use the program parameter state 1147 * tracking to get the scaling factor. 1148 */ 1149 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT && 1150 (intel->gen < 6 || 1151 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || 1152 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { 1153 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1154 int tokens[STATE_LENGTH] = { 1155 STATE_INTERNAL, 1156 STATE_TEXRECT_SCALE, 1157 sampler, 1158 0, 1159 0 1160 }; 1161 1162 if (c->dispatch_width == 16) { 1163 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1164 return fs_reg(this, ir->type); 1165 } 1166 1167 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1168 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1169 1170 GLuint index = _mesa_add_state_reference(params, 1171 (gl_state_index *)tokens); 1172 1173 this->param_index[c->prog_data.nr_params] = index; 1174 this->param_offset[c->prog_data.nr_params] = 0; 1175 c->prog_data.nr_params++; 1176 this->param_index[c->prog_data.nr_params] = index; 1177 this->param_offset[c->prog_data.nr_params] = 1; 1178 c->prog_data.nr_params++; 1179 } 1180 1181 /* The 965 requires the EU to do the normalization of GL rectangle 1182 * texture coordinates. We use the program parameter state 1183 * tracking to get the scaling factor. 1184 */ 1185 if (intel->gen < 6 && 1186 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1187 fs_reg dst = fs_reg(this, ir->coordinate->type); 1188 fs_reg src = coordinate; 1189 coordinate = dst; 1190 1191 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1192 dst.reg_offset++; 1193 src.reg_offset++; 1194 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1195 } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1196 /* On gen6+, the sampler handles the rectangle coordinates 1197 * natively, without needing rescaling. But that means we have 1198 * to do GL_CLAMP clamping at the [0, width], [0, height] scale, 1199 * not [0, 1] like the default case below. 1200 */ 1201 needs_gl_clamp = false; 1202 1203 for (int i = 0; i < 2; i++) { 1204 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1205 fs_reg chan = coordinate; 1206 chan.reg_offset += i; 1207 1208 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); 1209 inst->conditional_mod = BRW_CONDITIONAL_G; 1210 1211 /* Our parameter comes in as 1.0/width or 1.0/height, 1212 * because that's what people normally want for doing 1213 * texture rectangle handling. We need width or height 1214 * for clamping, but we don't care enough to make a new 1215 * parameter type, so just invert back. 1216 */ 1217 fs_reg limit = fs_reg(this, glsl_type::float_type); 1218 emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y); 1219 emit(SHADER_OPCODE_RCP, limit, limit); 1220 1221 inst = emit(BRW_OPCODE_SEL, chan, chan, limit); 1222 inst->conditional_mod = BRW_CONDITIONAL_L; 1223 } 1224 } 1225 } 1226 1227 if (ir->coordinate && needs_gl_clamp) { 1228 for (unsigned int i = 0; 1229 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { 1230 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1231 fs_reg chan = coordinate; 1232 chan.reg_offset += i; 1233 1234 fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan); 1235 inst->saturate = true; 1236 } 1237 } 1238 } 1239 return coordinate; 1240} 1241 1242void 1243fs_visitor::visit(ir_texture *ir) 1244{ 1245 fs_inst *inst = NULL; 1246 1247 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 1248 sampler = fp->Base.SamplerUnits[sampler]; 1249 1250 /* Should be lowered by do_lower_texture_projection */ 1251 assert(!ir->projector); 1252 1253 /* Generate code to compute all the subexpression trees. This has to be 1254 * done before loading any values into MRFs for the sampler message since 1255 * generating these values may involve SEND messages that need the MRFs. 1256 */ 1257 fs_reg coordinate = emit_texcoord(ir, sampler); 1258 1259 fs_reg shadow_comparitor; 1260 if (ir->shadow_comparitor) { 1261 ir->shadow_comparitor->accept(this); 1262 shadow_comparitor = this->result; 1263 } 1264 1265 fs_reg lod, lod2; 1266 switch (ir->op) { 1267 case ir_tex: 1268 break; 1269 case ir_txb: 1270 ir->lod_info.bias->accept(this); 1271 lod = this->result; 1272 break; 1273 case ir_txd: 1274 ir->lod_info.grad.dPdx->accept(this); 1275 lod = this->result; 1276 1277 ir->lod_info.grad.dPdy->accept(this); 1278 lod2 = this->result; 1279 break; 1280 case ir_txf: 1281 case ir_txl: 1282 case ir_txs: 1283 ir->lod_info.lod->accept(this); 1284 lod = this->result; 1285 break; 1286 }; 1287 1288 /* Writemasking doesn't eliminate channels on SIMD8 texture 1289 * samples, so don't worry about them. 1290 */ 1291 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); 1292 1293 if (intel->gen >= 7) { 1294 inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, 1295 lod, lod2, sampler); 1296 } else if (intel->gen >= 5) { 1297 inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, 1298 lod, lod2, sampler); 1299 } else { 1300 inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, 1301 lod, lod2, sampler); 1302 } 1303 1304 /* The header is set up by generate_tex() when necessary. */ 1305 inst->src[0] = reg_undef; 1306 1307 if (ir->offset != NULL && ir->op != ir_txf) 1308 inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); 1309 1310 inst->sampler = sampler; 1311 1312 if (ir->shadow_comparitor) 1313 inst->shadow_compare = true; 1314 1315 swizzle_result(ir, dst, sampler); 1316} 1317 1318/** 1319 * Swizzle the result of a texture result. This is necessary for 1320 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1321 */ 1322void 1323fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1324{ 1325 this->result = orig_val; 1326 1327 if (ir->op == ir_txs) 1328 return; 1329 1330 if (ir->type == glsl_type::float_type) { 1331 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1332 assert(ir->sampler->type->sampler_shadow); 1333 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { 1334 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1335 1336 for (int i = 0; i < 4; i++) { 1337 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); 1338 fs_reg l = swizzled_result; 1339 l.reg_offset += i; 1340 1341 if (swiz == SWIZZLE_ZERO) { 1342 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1343 } else if (swiz == SWIZZLE_ONE) { 1344 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1345 } else { 1346 fs_reg r = orig_val; 1347 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); 1348 emit(BRW_OPCODE_MOV, l, r); 1349 } 1350 } 1351 this->result = swizzled_result; 1352 } 1353} 1354 1355void 1356fs_visitor::visit(ir_swizzle *ir) 1357{ 1358 ir->val->accept(this); 1359 fs_reg val = this->result; 1360 1361 if (ir->type->vector_elements == 1) { 1362 this->result.reg_offset += ir->mask.x; 1363 return; 1364 } 1365 1366 fs_reg result = fs_reg(this, ir->type); 1367 this->result = result; 1368 1369 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1370 fs_reg channel = val; 1371 int swiz = 0; 1372 1373 switch (i) { 1374 case 0: 1375 swiz = ir->mask.x; 1376 break; 1377 case 1: 1378 swiz = ir->mask.y; 1379 break; 1380 case 2: 1381 swiz = ir->mask.z; 1382 break; 1383 case 3: 1384 swiz = ir->mask.w; 1385 break; 1386 } 1387 1388 channel.reg_offset += swiz; 1389 emit(BRW_OPCODE_MOV, result, channel); 1390 result.reg_offset++; 1391 } 1392} 1393 1394void 1395fs_visitor::visit(ir_discard *ir) 1396{ 1397 assert(ir->condition == NULL); /* FINISHME */ 1398 1399 emit(FS_OPCODE_DISCARD); 1400} 1401 1402void 1403fs_visitor::visit(ir_constant *ir) 1404{ 1405 /* Set this->result to reg at the bottom of the function because some code 1406 * paths will cause this visitor to be applied to other fields. This will 1407 * cause the value stored in this->result to be modified. 1408 * 1409 * Make reg constant so that it doesn't get accidentally modified along the 1410 * way. Yes, I actually had this problem. :( 1411 */ 1412 const fs_reg reg(this, ir->type); 1413 fs_reg dst_reg = reg; 1414 1415 if (ir->type->is_array()) { 1416 const unsigned size = type_size(ir->type->fields.array); 1417 1418 for (unsigned i = 0; i < ir->type->length; i++) { 1419 ir->array_elements[i]->accept(this); 1420 fs_reg src_reg = this->result; 1421 1422 dst_reg.type = src_reg.type; 1423 for (unsigned j = 0; j < size; j++) { 1424 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1425 src_reg.reg_offset++; 1426 dst_reg.reg_offset++; 1427 } 1428 } 1429 } else if (ir->type->is_record()) { 1430 foreach_list(node, &ir->components) { 1431 ir_constant *const field = (ir_constant *) node; 1432 const unsigned size = type_size(field->type); 1433 1434 field->accept(this); 1435 fs_reg src_reg = this->result; 1436 1437 dst_reg.type = src_reg.type; 1438 for (unsigned j = 0; j < size; j++) { 1439 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1440 src_reg.reg_offset++; 1441 dst_reg.reg_offset++; 1442 } 1443 } 1444 } else { 1445 const unsigned size = type_size(ir->type); 1446 1447 for (unsigned i = 0; i < size; i++) { 1448 switch (ir->type->base_type) { 1449 case GLSL_TYPE_FLOAT: 1450 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1451 break; 1452 case GLSL_TYPE_UINT: 1453 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1454 break; 1455 case GLSL_TYPE_INT: 1456 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1457 break; 1458 case GLSL_TYPE_BOOL: 1459 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1460 break; 1461 default: 1462 assert(!"Non-float/uint/int/bool constant"); 1463 } 1464 dst_reg.reg_offset++; 1465 } 1466 } 1467 1468 this->result = reg; 1469} 1470 1471void 1472fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1473{ 1474 ir_expression *expr = ir->as_expression(); 1475 1476 if (expr) { 1477 fs_reg op[2]; 1478 fs_inst *inst; 1479 1480 assert(expr->get_num_operands() <= 2); 1481 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1482 assert(expr->operands[i]->type->is_scalar()); 1483 1484 expr->operands[i]->accept(this); 1485 op[i] = this->result; 1486 1487 resolve_ud_negate(&op[i]); 1488 } 1489 1490 switch (expr->operation) { 1491 case ir_unop_logic_not: 1492 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1493 inst->conditional_mod = BRW_CONDITIONAL_Z; 1494 break; 1495 1496 case ir_binop_logic_xor: 1497 case ir_binop_logic_or: 1498 case ir_binop_logic_and: 1499 goto out; 1500 1501 case ir_unop_f2b: 1502 if (intel->gen >= 6) { 1503 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1504 } else { 1505 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1506 } 1507 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1508 break; 1509 1510 case ir_unop_i2b: 1511 if (intel->gen >= 6) { 1512 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1513 } else { 1514 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1515 } 1516 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1517 break; 1518 1519 case ir_binop_greater: 1520 case ir_binop_gequal: 1521 case ir_binop_less: 1522 case ir_binop_lequal: 1523 case ir_binop_equal: 1524 case ir_binop_all_equal: 1525 case ir_binop_nequal: 1526 case ir_binop_any_nequal: 1527 resolve_bool_comparison(expr->operands[0], &op[0]); 1528 resolve_bool_comparison(expr->operands[1], &op[1]); 1529 1530 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1531 inst->conditional_mod = 1532 brw_conditional_for_comparison(expr->operation); 1533 break; 1534 1535 default: 1536 assert(!"not reached"); 1537 fail("bad cond code\n"); 1538 break; 1539 } 1540 return; 1541 } 1542 1543out: 1544 ir->accept(this); 1545 1546 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1547 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1548} 1549 1550/** 1551 * Emit a gen6 IF statement with the comparison folded into the IF 1552 * instruction. 1553 */ 1554void 1555fs_visitor::emit_if_gen6(ir_if *ir) 1556{ 1557 ir_expression *expr = ir->condition->as_expression(); 1558 1559 if (expr) { 1560 fs_reg op[2]; 1561 fs_inst *inst; 1562 fs_reg temp; 1563 1564 assert(expr->get_num_operands() <= 2); 1565 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1566 assert(expr->operands[i]->type->is_scalar()); 1567 1568 expr->operands[i]->accept(this); 1569 op[i] = this->result; 1570 } 1571 1572 switch (expr->operation) { 1573 case ir_unop_logic_not: 1574 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1575 inst->conditional_mod = BRW_CONDITIONAL_Z; 1576 return; 1577 1578 case ir_binop_logic_xor: 1579 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1580 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1581 return; 1582 1583 case ir_binop_logic_or: 1584 temp = fs_reg(this, glsl_type::bool_type); 1585 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1586 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1587 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1588 return; 1589 1590 case ir_binop_logic_and: 1591 temp = fs_reg(this, glsl_type::bool_type); 1592 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1593 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1594 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1595 return; 1596 1597 case ir_unop_f2b: 1598 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1599 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1600 return; 1601 1602 case ir_unop_i2b: 1603 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1604 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1605 return; 1606 1607 case ir_binop_greater: 1608 case ir_binop_gequal: 1609 case ir_binop_less: 1610 case ir_binop_lequal: 1611 case ir_binop_equal: 1612 case ir_binop_all_equal: 1613 case ir_binop_nequal: 1614 case ir_binop_any_nequal: 1615 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1616 inst->conditional_mod = 1617 brw_conditional_for_comparison(expr->operation); 1618 return; 1619 default: 1620 assert(!"not reached"); 1621 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1622 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1623 fail("bad condition\n"); 1624 return; 1625 } 1626 return; 1627 } 1628 1629 ir->condition->accept(this); 1630 1631 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1632 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1633} 1634 1635void 1636fs_visitor::visit(ir_if *ir) 1637{ 1638 fs_inst *inst; 1639 1640 if (intel->gen < 6 && c->dispatch_width == 16) { 1641 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1642 } 1643 1644 /* Don't point the annotation at the if statement, because then it plus 1645 * the then and else blocks get printed. 1646 */ 1647 this->base_ir = ir->condition; 1648 1649 if (intel->gen == 6) { 1650 emit_if_gen6(ir); 1651 } else { 1652 emit_bool_to_cond_code(ir->condition); 1653 1654 inst = emit(BRW_OPCODE_IF); 1655 inst->predicated = true; 1656 } 1657 1658 foreach_list(node, &ir->then_instructions) { 1659 ir_instruction *ir = (ir_instruction *)node; 1660 this->base_ir = ir; 1661 1662 ir->accept(this); 1663 } 1664 1665 if (!ir->else_instructions.is_empty()) { 1666 emit(BRW_OPCODE_ELSE); 1667 1668 foreach_list(node, &ir->else_instructions) { 1669 ir_instruction *ir = (ir_instruction *)node; 1670 this->base_ir = ir; 1671 1672 ir->accept(this); 1673 } 1674 } 1675 1676 emit(BRW_OPCODE_ENDIF); 1677} 1678 1679void 1680fs_visitor::visit(ir_loop *ir) 1681{ 1682 fs_reg counter = reg_undef; 1683 1684 if (intel->gen < 6 && c->dispatch_width == 16) { 1685 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1686 } 1687 1688 if (ir->counter) { 1689 this->base_ir = ir->counter; 1690 ir->counter->accept(this); 1691 counter = *(variable_storage(ir->counter)); 1692 1693 if (ir->from) { 1694 this->base_ir = ir->from; 1695 ir->from->accept(this); 1696 1697 emit(BRW_OPCODE_MOV, counter, this->result); 1698 } 1699 } 1700 1701 this->base_ir = NULL; 1702 emit(BRW_OPCODE_DO); 1703 1704 if (ir->to) { 1705 this->base_ir = ir->to; 1706 ir->to->accept(this); 1707 1708 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1709 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1710 1711 inst = emit(BRW_OPCODE_BREAK); 1712 inst->predicated = true; 1713 } 1714 1715 foreach_list(node, &ir->body_instructions) { 1716 ir_instruction *ir = (ir_instruction *)node; 1717 1718 this->base_ir = ir; 1719 ir->accept(this); 1720 } 1721 1722 if (ir->increment) { 1723 this->base_ir = ir->increment; 1724 ir->increment->accept(this); 1725 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1726 } 1727 1728 this->base_ir = NULL; 1729 emit(BRW_OPCODE_WHILE); 1730} 1731 1732void 1733fs_visitor::visit(ir_loop_jump *ir) 1734{ 1735 switch (ir->mode) { 1736 case ir_loop_jump::jump_break: 1737 emit(BRW_OPCODE_BREAK); 1738 break; 1739 case ir_loop_jump::jump_continue: 1740 emit(BRW_OPCODE_CONTINUE); 1741 break; 1742 } 1743} 1744 1745void 1746fs_visitor::visit(ir_call *ir) 1747{ 1748 assert(!"FINISHME"); 1749} 1750 1751void 1752fs_visitor::visit(ir_return *ir) 1753{ 1754 assert(!"FINISHME"); 1755} 1756 1757void 1758fs_visitor::visit(ir_function *ir) 1759{ 1760 /* Ignore function bodies other than main() -- we shouldn't see calls to 1761 * them since they should all be inlined before we get to ir_to_mesa. 1762 */ 1763 if (strcmp(ir->name, "main") == 0) { 1764 const ir_function_signature *sig; 1765 exec_list empty; 1766 1767 sig = ir->matching_signature(&empty); 1768 1769 assert(sig); 1770 1771 foreach_list(node, &sig->body) { 1772 ir_instruction *ir = (ir_instruction *)node; 1773 this->base_ir = ir; 1774 1775 ir->accept(this); 1776 } 1777 } 1778} 1779 1780void 1781fs_visitor::visit(ir_function_signature *ir) 1782{ 1783 assert(!"not reached"); 1784 (void)ir; 1785} 1786 1787fs_inst * 1788fs_visitor::emit(fs_inst inst) 1789{ 1790 fs_inst *list_inst = new(mem_ctx) fs_inst; 1791 *list_inst = inst; 1792 1793 if (force_uncompressed_stack > 0) 1794 list_inst->force_uncompressed = true; 1795 else if (force_sechalf_stack > 0) 1796 list_inst->force_sechalf = true; 1797 1798 list_inst->annotation = this->current_annotation; 1799 list_inst->ir = this->base_ir; 1800 1801 this->instructions.push_tail(list_inst); 1802 1803 return list_inst; 1804} 1805 1806/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1807void 1808fs_visitor::emit_dummy_fs() 1809{ 1810 int reg_width = c->dispatch_width / 8; 1811 1812 /* Everyone's favorite color. */ 1813 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); 1814 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); 1815 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); 1816 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); 1817 1818 fs_inst *write; 1819 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1820 write->base_mrf = 2; 1821 write->mlen = 4 * reg_width; 1822 write->eot = true; 1823} 1824 1825/* The register location here is relative to the start of the URB 1826 * data. It will get adjusted to be a real location before 1827 * generate_code() time. 1828 */ 1829struct brw_reg 1830fs_visitor::interp_reg(int location, int channel) 1831{ 1832 int regnr = urb_setup[location] * 2 + channel / 2; 1833 int stride = (channel & 1) * 4; 1834 1835 assert(urb_setup[location] != -1); 1836 1837 return brw_vec1_grf(regnr, stride); 1838} 1839 1840/** Emits the interpolation for the varying inputs. */ 1841void 1842fs_visitor::emit_interpolation_setup_gen4() 1843{ 1844 this->current_annotation = "compute pixel centers"; 1845 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1846 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1847 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1848 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1849 1850 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1851 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1852 1853 this->current_annotation = "compute pixel deltas from v0"; 1854 if (brw->has_pln) { 1855 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1856 fs_reg(this, glsl_type::vec2_type); 1857 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1858 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; 1859 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; 1860 } else { 1861 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1862 fs_reg(this, glsl_type::float_type); 1863 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1864 fs_reg(this, glsl_type::float_type); 1865 } 1866 emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1867 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1868 emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1869 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1870 1871 this->current_annotation = "compute pos.w and 1/pos.w"; 1872 /* Compute wpos.w. It's always in our setup, since it's needed to 1873 * interpolate the other attributes. 1874 */ 1875 this->wpos_w = fs_reg(this, glsl_type::float_type); 1876 emit(FS_OPCODE_LINTERP, wpos_w, 1877 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1878 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1879 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1880 /* Compute the pixel 1/W value from wpos.w. */ 1881 this->pixel_w = fs_reg(this, glsl_type::float_type); 1882 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1883 this->current_annotation = NULL; 1884} 1885 1886/** Emits the interpolation for the varying inputs. */ 1887void 1888fs_visitor::emit_interpolation_setup_gen6() 1889{ 1890 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1891 1892 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1893 this->current_annotation = "compute pixel centers"; 1894 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1895 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1896 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1897 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1898 emit(BRW_OPCODE_ADD, 1899 int_pixel_x, 1900 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1901 fs_reg(brw_imm_v(0x10101010))); 1902 emit(BRW_OPCODE_ADD, 1903 int_pixel_y, 1904 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1905 fs_reg(brw_imm_v(0x11001100))); 1906 1907 /* As of gen6, we can no longer mix float and int sources. We have 1908 * to turn the integer pixel centers into floats for their actual 1909 * use. 1910 */ 1911 this->pixel_x = fs_reg(this, glsl_type::float_type); 1912 this->pixel_y = fs_reg(this, glsl_type::float_type); 1913 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1914 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1915 1916 this->current_annotation = "compute pos.w"; 1917 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1918 this->wpos_w = fs_reg(this, glsl_type::float_type); 1919 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1920 1921 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { 1922 uint8_t reg = c->barycentric_coord_reg[i]; 1923 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); 1924 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); 1925 } 1926 1927 this->current_annotation = NULL; 1928} 1929 1930void 1931fs_visitor::emit_color_write(int target, int index, int first_color_mrf) 1932{ 1933 int reg_width = c->dispatch_width / 8; 1934 fs_inst *inst; 1935 fs_reg color = outputs[target]; 1936 fs_reg mrf; 1937 1938 /* If there's no color data to be written, skip it. */ 1939 if (color.file == BAD_FILE) 1940 return; 1941 1942 color.reg_offset += index; 1943 1944 if (c->dispatch_width == 8 || intel->gen >= 6) { 1945 /* SIMD8 write looks like: 1946 * m + 0: r0 1947 * m + 1: r1 1948 * m + 2: g0 1949 * m + 3: g1 1950 * 1951 * gen6 SIMD16 DP write looks like: 1952 * m + 0: r0 1953 * m + 1: r1 1954 * m + 2: g0 1955 * m + 3: g1 1956 * m + 4: b0 1957 * m + 5: b1 1958 * m + 6: a0 1959 * m + 7: a1 1960 */ 1961 inst = emit(BRW_OPCODE_MOV, 1962 fs_reg(MRF, first_color_mrf + index * reg_width, color.type), 1963 color); 1964 inst->saturate = c->key.clamp_fragment_color; 1965 } else { 1966 /* pre-gen6 SIMD16 single source DP write looks like: 1967 * m + 0: r0 1968 * m + 1: g0 1969 * m + 2: b0 1970 * m + 3: a0 1971 * m + 4: r1 1972 * m + 5: g1 1973 * m + 6: b1 1974 * m + 7: a1 1975 */ 1976 if (brw->has_compr4) { 1977 /* By setting the high bit of the MRF register number, we 1978 * indicate that we want COMPR4 mode - instead of doing the 1979 * usual destination + 1 for the second half we get 1980 * destination + 4. 1981 */ 1982 inst = emit(BRW_OPCODE_MOV, 1983 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, 1984 color.type), 1985 color); 1986 inst->saturate = c->key.clamp_fragment_color; 1987 } else { 1988 push_force_uncompressed(); 1989 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index, 1990 color.type), 1991 color); 1992 inst->saturate = c->key.clamp_fragment_color; 1993 pop_force_uncompressed(); 1994 1995 push_force_sechalf(); 1996 color.sechalf = true; 1997 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4, 1998 color.type), 1999 color); 2000 inst->saturate = c->key.clamp_fragment_color; 2001 pop_force_sechalf(); 2002 color.sechalf = false; 2003 } 2004 } 2005} 2006 2007void 2008fs_visitor::emit_fb_writes() 2009{ 2010 this->current_annotation = "FB write header"; 2011 bool header_present = true; 2012 /* We can potentially have a message length of up to 15, so we have to set 2013 * base_mrf to either 0 or 1 in order to fit in m0..m15. 2014 */ 2015 int base_mrf = 1; 2016 int nr = base_mrf; 2017 int reg_width = c->dispatch_width / 8; 2018 bool do_dual_src = this->dual_src_output.file != BAD_FILE; 2019 2020 if (c->dispatch_width == 16 && do_dual_src) { 2021 fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); 2022 do_dual_src = false; 2023 } 2024 2025 /* From the Sandy Bridge PRM, volume 4, page 198: 2026 * 2027 * "Dispatched Pixel Enables. One bit per pixel indicating 2028 * which pixels were originally enabled when the thread was 2029 * dispatched. This field is only required for the end-of- 2030 * thread message and on all dual-source messages." 2031 */ 2032 if (intel->gen >= 6 && 2033 !this->fp->UsesKill && 2034 !do_dual_src && 2035 c->key.nr_color_regions == 1) { 2036 header_present = false; 2037 } 2038 2039 if (header_present) { 2040 /* m2, m3 header */ 2041 nr += 2; 2042 } 2043 2044 if (c->aa_dest_stencil_reg) { 2045 push_force_uncompressed(); 2046 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2047 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 2048 pop_force_uncompressed(); 2049 } 2050 2051 /* Reserve space for color. It'll be filled in per MRT below. */ 2052 int color_mrf = nr; 2053 nr += 4 * reg_width; 2054 if (do_dual_src) 2055 nr += 4; 2056 2057 if (c->source_depth_to_render_target) { 2058 if (intel->gen == 6 && c->dispatch_width == 16) { 2059 /* For outputting oDepth on gen6, SIMD8 writes have to be 2060 * used. This would require 8-wide moves of each half to 2061 * message regs, kind of like pre-gen5 SIMD16 FB writes. 2062 * Just bail on doing so for now. 2063 */ 2064 fail("Missing support for simd16 depth writes on gen6\n"); 2065 } 2066 2067 if (c->computes_depth) { 2068 /* Hand over gl_FragDepth. */ 2069 assert(this->frag_depth); 2070 fs_reg depth = *(variable_storage(this->frag_depth)); 2071 2072 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 2073 } else { 2074 /* Pass through the payload depth. */ 2075 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2076 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 2077 } 2078 nr += reg_width; 2079 } 2080 2081 if (c->dest_depth_reg) { 2082 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2083 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 2084 nr += reg_width; 2085 } 2086 2087 if (do_dual_src) { 2088 fs_reg src0 = this->outputs[0]; 2089 fs_reg src1 = this->dual_src_output; 2090 2091 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2092 "FB write src0"); 2093 for (int i = 0; i < 4; i++) { 2094 fs_inst *inst = emit(BRW_OPCODE_MOV, 2095 fs_reg(MRF, color_mrf + i, src0.type), 2096 src0); 2097 src0.reg_offset++; 2098 inst->saturate = c->key.clamp_fragment_color; 2099 } 2100 2101 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2102 "FB write src1"); 2103 for (int i = 0; i < 4; i++) { 2104 fs_inst *inst = emit(BRW_OPCODE_MOV, 2105 fs_reg(MRF, color_mrf + 4 + i, src1.type), 2106 src1); 2107 src1.reg_offset++; 2108 inst->saturate = c->key.clamp_fragment_color; 2109 } 2110 2111 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2112 inst->target = 0; 2113 inst->base_mrf = base_mrf; 2114 inst->mlen = nr - base_mrf; 2115 inst->eot = true; 2116 inst->header_present = header_present; 2117 2118 c->prog_data.dual_src_blend = true; 2119 this->current_annotation = NULL; 2120 return; 2121 } 2122 2123 for (int target = 0; target < c->key.nr_color_regions; target++) { 2124 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2125 "FB write target %d", 2126 target); 2127 for (unsigned i = 0; i < this->output_components[target]; i++) 2128 emit_color_write(target, i, color_mrf); 2129 2130 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2131 inst->target = target; 2132 inst->base_mrf = base_mrf; 2133 inst->mlen = nr - base_mrf; 2134 if (target == c->key.nr_color_regions - 1) 2135 inst->eot = true; 2136 inst->header_present = header_present; 2137 } 2138 2139 if (c->key.nr_color_regions == 0) { 2140 /* Even if there's no color buffers enabled, we still need to send 2141 * alpha out the pipeline to our null renderbuffer to support 2142 * alpha-testing, alpha-to-coverage, and so on. 2143 */ 2144 emit_color_write(0, 3, color_mrf); 2145 2146 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2147 inst->base_mrf = base_mrf; 2148 inst->mlen = nr - base_mrf; 2149 inst->eot = true; 2150 inst->header_present = header_present; 2151 } 2152 2153 this->current_annotation = NULL; 2154} 2155 2156void 2157fs_visitor::resolve_ud_negate(fs_reg *reg) 2158{ 2159 if (reg->type != BRW_REGISTER_TYPE_UD || 2160 !reg->negate) 2161 return; 2162 2163 fs_reg temp = fs_reg(this, glsl_type::uint_type); 2164 emit(BRW_OPCODE_MOV, temp, *reg); 2165 *reg = temp; 2166} 2167 2168void 2169fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) 2170{ 2171 if (rvalue->type != glsl_type::bool_type) 2172 return; 2173 2174 fs_reg temp = fs_reg(this, glsl_type::bool_type); 2175 emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1)); 2176 *reg = temp; 2177} 2178 2179fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 2180 struct brw_shader *shader) 2181{ 2182 this->c = c; 2183 this->p = &c->func; 2184 this->brw = p->brw; 2185 this->fp = (struct gl_fragment_program *) 2186 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 2187 this->prog = prog; 2188 this->intel = &brw->intel; 2189 this->ctx = &intel->ctx; 2190 this->mem_ctx = ralloc_context(NULL); 2191 this->shader = shader; 2192 this->failed = false; 2193 this->variable_ht = hash_table_ctor(0, 2194 hash_table_pointer_hash, 2195 hash_table_pointer_compare); 2196 2197 /* There's a question that appears to be left open in the spec: 2198 * How do implicit dst conversions interact with the CMP 2199 * instruction or conditional mods? On gen6, the instruction: 2200 * 2201 * CMP null<d> src0<f> src1<f> 2202 * 2203 * will do src1 - src0 and compare that result as if it was an 2204 * integer. On gen4, it will do src1 - src0 as float, convert 2205 * the result to int, and compare as int. In between, it 2206 * appears that it does src1 - src0 and does the compare in the 2207 * execution type so dst type doesn't matter. 2208 */ 2209 if (this->intel->gen > 4) 2210 this->reg_null_cmp = reg_null_d; 2211 else 2212 this->reg_null_cmp = reg_null_f; 2213 2214 this->frag_depth = NULL; 2215 memset(this->outputs, 0, sizeof(this->outputs)); 2216 this->first_non_payload_grf = 0; 2217 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 2218 2219 this->current_annotation = NULL; 2220 this->base_ir = NULL; 2221 2222 this->virtual_grf_sizes = NULL; 2223 this->virtual_grf_count = 0; 2224 this->virtual_grf_array_size = 0; 2225 this->virtual_grf_def = NULL; 2226 this->virtual_grf_use = NULL; 2227 this->live_intervals_valid = false; 2228 2229 this->force_uncompressed_stack = 0; 2230 this->force_sechalf_stack = 0; 2231} 2232 2233fs_visitor::~fs_visitor() 2234{ 2235 ralloc_free(this->mem_ctx); 2236 hash_table_dtor(this->variable_ht); 2237} 2238