brw_fs_visitor.cpp revision cc44aa77490e1360b099eb0b887266f434298b4f
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "glsl/glsl_types.h" 50#include "glsl/ir_optimization.h" 51#include "glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (ir->mode == ir_var_in) { 62 if (!strcmp(ir->name, "gl_FragCoord")) { 63 reg = emit_fragcoord_interpolation(ir); 64 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 65 reg = emit_frontfacing_interpolation(ir); 66 } else { 67 reg = emit_general_interpolation(ir); 68 } 69 assert(reg); 70 hash_table_insert(this->variable_ht, reg, ir); 71 return; 72 } else if (ir->mode == ir_var_out) { 73 reg = new(this->mem_ctx) fs_reg(this, ir->type); 74 75 if (ir->index > 0) { 76 assert(ir->location == FRAG_RESULT_DATA0); 77 assert(ir->index == 1); 78 this->dual_src_output = *reg; 79 } else if (ir->location == FRAG_RESULT_COLOR) { 80 /* Writing gl_FragColor outputs to all color regions. */ 81 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { 82 this->outputs[i] = *reg; 83 this->output_components[i] = 4; 84 } 85 } else if (ir->location == FRAG_RESULT_DEPTH) { 86 this->frag_depth = ir; 87 } else { 88 /* gl_FragData or a user-defined FS output */ 89 assert(ir->location >= FRAG_RESULT_DATA0 && 90 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); 91 92 int vector_elements = 93 ir->type->is_array() ? ir->type->fields.array->vector_elements 94 : ir->type->vector_elements; 95 96 /* General color output. */ 97 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { 98 int output = ir->location - FRAG_RESULT_DATA0 + i; 99 this->outputs[output] = *reg; 100 this->outputs[output].reg_offset += vector_elements * i; 101 this->output_components[output] = vector_elements; 102 } 103 } 104 } else if (ir->mode == ir_var_uniform) { 105 int param_index = c->prog_data.nr_params; 106 107 if (c->dispatch_width == 16) { 108 if (!variable_storage(ir)) { 109 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 110 } 111 return; 112 } 113 114 if (!strncmp(ir->name, "gl_", 3)) { 115 setup_builtin_uniform_values(ir); 116 } else { 117 setup_uniform_values(ir->location, ir->type); 118 } 119 120 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 121 reg->type = brw_type_for_base_type(ir->type); 122 } 123 124 if (!reg) 125 reg = new(this->mem_ctx) fs_reg(this, ir->type); 126 127 hash_table_insert(this->variable_ht, reg, ir); 128} 129 130void 131fs_visitor::visit(ir_dereference_variable *ir) 132{ 133 fs_reg *reg = variable_storage(ir->var); 134 this->result = *reg; 135} 136 137void 138fs_visitor::visit(ir_dereference_record *ir) 139{ 140 const glsl_type *struct_type = ir->record->type; 141 142 ir->record->accept(this); 143 144 unsigned int offset = 0; 145 for (unsigned int i = 0; i < struct_type->length; i++) { 146 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 147 break; 148 offset += type_size(struct_type->fields.structure[i].type); 149 } 150 this->result.reg_offset += offset; 151 this->result.type = brw_type_for_base_type(ir->type); 152} 153 154void 155fs_visitor::visit(ir_dereference_array *ir) 156{ 157 ir_constant *index; 158 int element_size; 159 160 ir->array->accept(this); 161 index = ir->array_index->as_constant(); 162 163 element_size = type_size(ir->type); 164 this->result.type = brw_type_for_base_type(ir->type); 165 166 if (index) { 167 assert(this->result.file == UNIFORM || this->result.file == GRF); 168 this->result.reg_offset += index->value.i[0] * element_size; 169 } else { 170 assert(!"FINISHME: non-constant array element"); 171 } 172} 173 174/* Instruction selection: Produce a MOV.sat instead of 175 * MIN(MAX(val, 0), 1) when possible. 176 */ 177bool 178fs_visitor::try_emit_saturate(ir_expression *ir) 179{ 180 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 181 182 if (!sat_val) 183 return false; 184 185 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); 186 187 sat_val->accept(this); 188 fs_reg src = this->result; 189 190 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); 191 192 /* If the last instruction from our accept() didn't generate our 193 * src, generate a saturated MOV 194 */ 195 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); 196 if (!modify || modify->regs_written() != 1) { 197 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 198 inst->saturate = true; 199 } else { 200 modify->saturate = true; 201 this->result = src; 202 } 203 204 205 return true; 206} 207 208bool 209fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) 210{ 211 /* 3-src instructions were introduced in gen6. */ 212 if (intel->gen < 6) 213 return false; 214 215 /* MAD can only handle floating-point data. */ 216 if (ir->type != glsl_type::float_type) 217 return false; 218 219 ir_rvalue *nonmul = ir->operands[1 - mul_arg]; 220 ir_expression *mul = ir->operands[mul_arg]->as_expression(); 221 222 if (!mul || mul->operation != ir_binop_mul) 223 return false; 224 225 if (nonmul->as_constant() || 226 mul->operands[0]->as_constant() || 227 mul->operands[1]->as_constant()) 228 return false; 229 230 nonmul->accept(this); 231 fs_reg src0 = this->result; 232 233 mul->operands[0]->accept(this); 234 fs_reg src1 = this->result; 235 236 mul->operands[1]->accept(this); 237 fs_reg src2 = this->result; 238 239 this->result = fs_reg(this, ir->type); 240 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); 241 242 return true; 243} 244 245void 246fs_visitor::visit(ir_expression *ir) 247{ 248 unsigned int operand; 249 fs_reg op[2], temp; 250 fs_inst *inst; 251 252 assert(ir->get_num_operands() <= 2); 253 254 if (try_emit_saturate(ir)) 255 return; 256 if (ir->operation == ir_binop_add) { 257 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) 258 return; 259 } 260 261 for (operand = 0; operand < ir->get_num_operands(); operand++) { 262 ir->operands[operand]->accept(this); 263 if (this->result.file == BAD_FILE) { 264 ir_print_visitor v; 265 fail("Failed to get tree for expression operand:\n"); 266 ir->operands[operand]->accept(&v); 267 } 268 op[operand] = this->result; 269 270 /* Matrix expression operands should have been broken down to vector 271 * operations already. 272 */ 273 assert(!ir->operands[operand]->type->is_matrix()); 274 /* And then those vector operands should have been broken down to scalar. 275 */ 276 assert(!ir->operands[operand]->type->is_vector()); 277 } 278 279 /* Storage for our result. If our result goes into an assignment, it will 280 * just get copy-propagated out, so no worries. 281 */ 282 this->result = fs_reg(this, ir->type); 283 284 switch (ir->operation) { 285 case ir_unop_logic_not: 286 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 287 * ones complement of the whole register, not just bit 0. 288 */ 289 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 290 break; 291 case ir_unop_neg: 292 op[0].negate = !op[0].negate; 293 this->result = op[0]; 294 break; 295 case ir_unop_abs: 296 op[0].abs = true; 297 op[0].negate = false; 298 this->result = op[0]; 299 break; 300 case ir_unop_sign: 301 temp = fs_reg(this, ir->type); 302 303 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 304 305 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 306 inst->conditional_mod = BRW_CONDITIONAL_G; 307 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 308 inst->predicated = true; 309 310 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 311 inst->conditional_mod = BRW_CONDITIONAL_L; 312 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 313 inst->predicated = true; 314 315 break; 316 case ir_unop_rcp: 317 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 318 break; 319 320 case ir_unop_exp2: 321 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 322 break; 323 case ir_unop_log2: 324 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 325 break; 326 case ir_unop_exp: 327 case ir_unop_log: 328 assert(!"not reached: should be handled by ir_explog_to_explog2"); 329 break; 330 case ir_unop_sin: 331 case ir_unop_sin_reduced: 332 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 333 break; 334 case ir_unop_cos: 335 case ir_unop_cos_reduced: 336 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 337 break; 338 339 case ir_unop_dFdx: 340 emit(FS_OPCODE_DDX, this->result, op[0]); 341 break; 342 case ir_unop_dFdy: 343 emit(FS_OPCODE_DDY, this->result, op[0]); 344 break; 345 346 case ir_binop_add: 347 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 348 break; 349 case ir_binop_sub: 350 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 351 break; 352 353 case ir_binop_mul: 354 if (ir->type->is_integer()) { 355 /* For integer multiplication, the MUL uses the low 16 bits 356 * of one of the operands (src0 on gen6, src1 on gen7). The 357 * MACH accumulates in the contribution of the upper 16 bits 358 * of that operand. 359 * 360 * FINISHME: Emit just the MUL if we know an operand is small 361 * enough. 362 */ 363 if (intel->gen >= 7 && c->dispatch_width == 16) 364 fail("16-wide explicit accumulator operands unsupported\n"); 365 366 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); 367 368 emit(BRW_OPCODE_MUL, acc, op[0], op[1]); 369 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); 370 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); 371 } else { 372 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 373 } 374 break; 375 case ir_binop_div: 376 if (intel->gen >= 7 && c->dispatch_width == 16) 377 fail("16-wide INTDIV unsupported\n"); 378 379 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ 380 assert(ir->type->is_integer()); 381 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); 382 break; 383 case ir_binop_mod: 384 if (intel->gen >= 7 && c->dispatch_width == 16) 385 fail("16-wide INTDIV unsupported\n"); 386 387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 388 assert(ir->type->is_integer()); 389 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); 390 break; 391 392 case ir_binop_less: 393 case ir_binop_greater: 394 case ir_binop_lequal: 395 case ir_binop_gequal: 396 case ir_binop_equal: 397 case ir_binop_all_equal: 398 case ir_binop_nequal: 399 case ir_binop_any_nequal: 400 temp = this->result; 401 /* original gen4 does implicit conversion before comparison. */ 402 if (intel->gen < 5) 403 temp.type = op[0].type; 404 405 resolve_ud_negate(&op[0]); 406 resolve_ud_negate(&op[1]); 407 408 resolve_bool_comparison(ir->operands[0], &op[0]); 409 resolve_bool_comparison(ir->operands[1], &op[1]); 410 411 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 412 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 413 break; 414 415 case ir_binop_logic_xor: 416 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 417 break; 418 419 case ir_binop_logic_or: 420 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 421 break; 422 423 case ir_binop_logic_and: 424 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 425 break; 426 427 case ir_binop_dot: 428 case ir_unop_any: 429 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 430 break; 431 432 case ir_unop_noise: 433 assert(!"not reached: should be handled by lower_noise"); 434 break; 435 436 case ir_quadop_vector: 437 assert(!"not reached: should be handled by lower_quadop_vector"); 438 break; 439 440 case ir_unop_sqrt: 441 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 442 break; 443 444 case ir_unop_rsq: 445 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 446 break; 447 448 case ir_unop_bitcast_i2f: 449 case ir_unop_bitcast_u2f: 450 op[0].type = BRW_REGISTER_TYPE_F; 451 this->result = op[0]; 452 break; 453 case ir_unop_i2u: 454 case ir_unop_bitcast_f2u: 455 op[0].type = BRW_REGISTER_TYPE_UD; 456 this->result = op[0]; 457 break; 458 case ir_unop_u2i: 459 case ir_unop_bitcast_f2i: 460 op[0].type = BRW_REGISTER_TYPE_D; 461 this->result = op[0]; 462 break; 463 case ir_unop_i2f: 464 case ir_unop_u2f: 465 case ir_unop_f2i: 466 case ir_unop_f2u: 467 emit(BRW_OPCODE_MOV, this->result, op[0]); 468 break; 469 470 case ir_unop_b2i: 471 inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1)); 472 break; 473 case ir_unop_b2f: 474 temp = fs_reg(this, glsl_type::int_type); 475 emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1)); 476 emit(BRW_OPCODE_MOV, this->result, temp); 477 break; 478 479 case ir_unop_f2b: 480 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)); 481 inst->conditional_mod = BRW_CONDITIONAL_NZ; 482 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 483 break; 484 case ir_unop_i2b: 485 assert(op[0].type == BRW_REGISTER_TYPE_D); 486 487 inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0)); 488 inst->conditional_mod = BRW_CONDITIONAL_NZ; 489 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 490 break; 491 492 case ir_unop_trunc: 493 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 494 break; 495 case ir_unop_ceil: 496 op[0].negate = !op[0].negate; 497 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 498 this->result.negate = true; 499 break; 500 case ir_unop_floor: 501 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 502 break; 503 case ir_unop_fract: 504 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 505 break; 506 case ir_unop_round_even: 507 emit(BRW_OPCODE_RNDE, this->result, op[0]); 508 break; 509 510 case ir_binop_min: 511 resolve_ud_negate(&op[0]); 512 resolve_ud_negate(&op[1]); 513 514 if (intel->gen >= 6) { 515 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 516 inst->conditional_mod = BRW_CONDITIONAL_L; 517 } else { 518 /* Unalias the destination */ 519 this->result = fs_reg(this, ir->type); 520 521 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 522 inst->conditional_mod = BRW_CONDITIONAL_L; 523 524 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 525 inst->predicated = true; 526 } 527 break; 528 case ir_binop_max: 529 resolve_ud_negate(&op[0]); 530 resolve_ud_negate(&op[1]); 531 532 if (intel->gen >= 6) { 533 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 534 inst->conditional_mod = BRW_CONDITIONAL_GE; 535 } else { 536 /* Unalias the destination */ 537 this->result = fs_reg(this, ir->type); 538 539 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 540 inst->conditional_mod = BRW_CONDITIONAL_G; 541 542 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 543 inst->predicated = true; 544 } 545 break; 546 547 case ir_binop_pow: 548 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 549 break; 550 551 case ir_unop_bit_not: 552 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 553 break; 554 case ir_binop_bit_and: 555 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 556 break; 557 case ir_binop_bit_xor: 558 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 559 break; 560 case ir_binop_bit_or: 561 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 562 break; 563 564 case ir_binop_lshift: 565 inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]); 566 break; 567 568 case ir_binop_rshift: 569 if (ir->type->base_type == GLSL_TYPE_INT) 570 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]); 571 else 572 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]); 573 break; 574 } 575} 576 577void 578fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 579 const glsl_type *type, bool predicated) 580{ 581 switch (type->base_type) { 582 case GLSL_TYPE_FLOAT: 583 case GLSL_TYPE_UINT: 584 case GLSL_TYPE_INT: 585 case GLSL_TYPE_BOOL: 586 for (unsigned int i = 0; i < type->components(); i++) { 587 l.type = brw_type_for_base_type(type); 588 r.type = brw_type_for_base_type(type); 589 590 if (predicated || !l.equals(r)) { 591 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 592 inst->predicated = predicated; 593 } 594 595 l.reg_offset++; 596 r.reg_offset++; 597 } 598 break; 599 case GLSL_TYPE_ARRAY: 600 for (unsigned int i = 0; i < type->length; i++) { 601 emit_assignment_writes(l, r, type->fields.array, predicated); 602 } 603 break; 604 605 case GLSL_TYPE_STRUCT: 606 for (unsigned int i = 0; i < type->length; i++) { 607 emit_assignment_writes(l, r, type->fields.structure[i].type, 608 predicated); 609 } 610 break; 611 612 case GLSL_TYPE_SAMPLER: 613 break; 614 615 default: 616 assert(!"not reached"); 617 break; 618 } 619} 620 621/* If the RHS processing resulted in an instruction generating a 622 * temporary value, and it would be easy to rewrite the instruction to 623 * generate its result right into the LHS instead, do so. This ends 624 * up reliably removing instructions where it can be tricky to do so 625 * later without real UD chain information. 626 */ 627bool 628fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 629 fs_reg dst, 630 fs_reg src, 631 fs_inst *pre_rhs_inst, 632 fs_inst *last_rhs_inst) 633{ 634 /* Only attempt if we're doing a direct assignment. */ 635 if (ir->condition || 636 !(ir->lhs->type->is_scalar() || 637 (ir->lhs->type->is_vector() && 638 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) 639 return false; 640 641 /* Make sure the last instruction generated our source reg. */ 642 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, 643 last_rhs_inst, 644 src); 645 if (!modify) 646 return false; 647 648 /* If last_rhs_inst wrote a different number of components than our LHS, 649 * we can't safely rewrite it. 650 */ 651 if (ir->lhs->type->vector_elements != modify->regs_written()) 652 return false; 653 654 /* Success! Rewrite the instruction. */ 655 modify->dst = dst; 656 657 return true; 658} 659 660void 661fs_visitor::visit(ir_assignment *ir) 662{ 663 fs_reg l, r; 664 fs_inst *inst; 665 666 /* FINISHME: arrays on the lhs */ 667 ir->lhs->accept(this); 668 l = this->result; 669 670 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); 671 672 ir->rhs->accept(this); 673 r = this->result; 674 675 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); 676 677 assert(l.file != BAD_FILE); 678 assert(r.file != BAD_FILE); 679 680 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) 681 return; 682 683 if (ir->condition) { 684 emit_bool_to_cond_code(ir->condition); 685 } 686 687 if (ir->lhs->type->is_scalar() || 688 ir->lhs->type->is_vector()) { 689 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 690 if (ir->write_mask & (1 << i)) { 691 inst = emit(BRW_OPCODE_MOV, l, r); 692 if (ir->condition) 693 inst->predicated = true; 694 r.reg_offset++; 695 } 696 l.reg_offset++; 697 } 698 } else { 699 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 700 } 701} 702 703fs_inst * 704fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 705 int sampler) 706{ 707 int mlen; 708 int base_mrf = 1; 709 bool simd16 = false; 710 fs_reg orig_dst; 711 712 /* g0 header. */ 713 mlen = 1; 714 715 if (ir->shadow_comparitor) { 716 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 717 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 718 coordinate.reg_offset++; 719 } 720 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 721 mlen += 3; 722 723 if (ir->op == ir_tex) { 724 /* There's no plain shadow compare message, so we use shadow 725 * compare with a bias of 0.0. 726 */ 727 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 728 mlen++; 729 } else if (ir->op == ir_txb) { 730 ir->lod_info.bias->accept(this); 731 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 732 mlen++; 733 } else { 734 assert(ir->op == ir_txl); 735 ir->lod_info.lod->accept(this); 736 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 737 mlen++; 738 } 739 740 ir->shadow_comparitor->accept(this); 741 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 742 mlen++; 743 } else if (ir->op == ir_tex) { 744 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 745 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 746 coordinate.reg_offset++; 747 } 748 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 749 mlen += 3; 750 } else if (ir->op == ir_txd) { 751 ir->lod_info.grad.dPdx->accept(this); 752 fs_reg dPdx = this->result; 753 754 ir->lod_info.grad.dPdy->accept(this); 755 fs_reg dPdy = this->result; 756 757 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 758 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 759 coordinate.reg_offset++; 760 } 761 /* the slots for u and v are always present, but r is optional */ 762 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 763 764 /* P = u, v, r 765 * dPdx = dudx, dvdx, drdx 766 * dPdy = dudy, dvdy, drdy 767 * 768 * 1-arg: Does not exist. 769 * 770 * 2-arg: dudx dvdx dudy dvdy 771 * dPdx.x dPdx.y dPdy.x dPdy.y 772 * m4 m5 m6 m7 773 * 774 * 3-arg: dudx dvdx drdx dudy dvdy drdy 775 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 776 * m5 m6 m7 m8 m9 m10 777 */ 778 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 779 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 780 dPdx.reg_offset++; 781 } 782 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 783 784 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 785 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 786 dPdy.reg_offset++; 787 } 788 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 789 } else if (ir->op == ir_txs) { 790 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ 791 simd16 = true; 792 ir->lod_info.lod->accept(this); 793 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 794 mlen += 2; 795 } else { 796 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 797 * instructions. We'll need to do SIMD16 here. 798 */ 799 simd16 = true; 800 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); 801 802 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 803 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), 804 coordinate); 805 coordinate.reg_offset++; 806 } 807 808 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to 809 * be necessary for TXF (ld), but seems wise to do for all messages. 810 */ 811 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { 812 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); 813 } 814 815 /* lod/bias appears after u/v/r. */ 816 mlen += 6; 817 818 if (ir->op == ir_txb) { 819 ir->lod_info.bias->accept(this); 820 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 821 mlen++; 822 } else { 823 ir->lod_info.lod->accept(this); 824 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, this->result.type), 825 this->result); 826 mlen++; 827 } 828 829 /* The unused upper half. */ 830 mlen++; 831 } 832 833 if (simd16) { 834 /* Now, since we're doing simd16, the return is 2 interleaved 835 * vec4s where the odd-indexed ones are junk. We'll need to move 836 * this weirdness around to the expected layout. 837 */ 838 orig_dst = dst; 839 const glsl_type *vec_type = 840 glsl_type::get_instance(ir->type->base_type, 4, 1); 841 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); 842 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) 843 : BRW_REGISTER_TYPE_F; 844 } 845 846 fs_inst *inst = NULL; 847 switch (ir->op) { 848 case ir_tex: 849 inst = emit(SHADER_OPCODE_TEX, dst); 850 break; 851 case ir_txb: 852 inst = emit(FS_OPCODE_TXB, dst); 853 break; 854 case ir_txl: 855 inst = emit(SHADER_OPCODE_TXL, dst); 856 break; 857 case ir_txd: 858 inst = emit(SHADER_OPCODE_TXD, dst); 859 break; 860 case ir_txs: 861 inst = emit(SHADER_OPCODE_TXS, dst); 862 break; 863 case ir_txf: 864 inst = emit(SHADER_OPCODE_TXF, dst); 865 break; 866 } 867 inst->base_mrf = base_mrf; 868 inst->mlen = mlen; 869 inst->header_present = true; 870 871 if (simd16) { 872 for (int i = 0; i < 4; i++) { 873 emit(BRW_OPCODE_MOV, orig_dst, dst); 874 orig_dst.reg_offset++; 875 dst.reg_offset += 2; 876 } 877 } 878 879 return inst; 880} 881 882/* gen5's sampler has slots for u, v, r, array index, then optional 883 * parameters like shadow comparitor or LOD bias. If optional 884 * parameters aren't present, those base slots are optional and don't 885 * need to be included in the message. 886 * 887 * We don't fill in the unnecessary slots regardless, which may look 888 * surprising in the disassembly. 889 */ 890fs_inst * 891fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 892 int sampler) 893{ 894 int mlen = 0; 895 int base_mrf = 2; 896 int reg_width = c->dispatch_width / 8; 897 bool header_present = false; 898 const int vector_elements = 899 ir->coordinate ? ir->coordinate->type->vector_elements : 0; 900 901 if (ir->offset != NULL && ir->op == ir_txf) { 902 /* It appears that the ld instruction used for txf does its 903 * address bounds check before adding in the offset. To work 904 * around this, just add the integer offset to the integer texel 905 * coordinate, and don't put the offset in the header. 906 */ 907 ir_constant *offset = ir->offset->as_constant(); 908 for (int i = 0; i < vector_elements; i++) { 909 emit(BRW_OPCODE_ADD, 910 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 911 coordinate, 912 offset->value.i[i]); 913 coordinate.reg_offset++; 914 } 915 } else { 916 if (ir->offset) { 917 /* The offsets set up by the ir_texture visitor are in the 918 * m1 header, so we can't go headerless. 919 */ 920 header_present = true; 921 mlen++; 922 base_mrf--; 923 } 924 925 for (int i = 0; i < vector_elements; i++) { 926 emit(BRW_OPCODE_MOV, 927 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 928 coordinate); 929 coordinate.reg_offset++; 930 } 931 } 932 mlen += vector_elements * reg_width; 933 934 if (ir->shadow_comparitor) { 935 mlen = MAX2(mlen, header_present + 4 * reg_width); 936 937 ir->shadow_comparitor->accept(this); 938 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 939 mlen += reg_width; 940 } 941 942 fs_inst *inst = NULL; 943 switch (ir->op) { 944 case ir_tex: 945 inst = emit(SHADER_OPCODE_TEX, dst); 946 break; 947 case ir_txb: 948 ir->lod_info.bias->accept(this); 949 mlen = MAX2(mlen, header_present + 4 * reg_width); 950 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 951 mlen += reg_width; 952 953 inst = emit(FS_OPCODE_TXB, dst); 954 955 break; 956 case ir_txl: 957 ir->lod_info.lod->accept(this); 958 mlen = MAX2(mlen, header_present + 4 * reg_width); 959 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 960 mlen += reg_width; 961 962 inst = emit(SHADER_OPCODE_TXL, dst); 963 break; 964 case ir_txd: { 965 ir->lod_info.grad.dPdx->accept(this); 966 fs_reg dPdx = this->result; 967 968 ir->lod_info.grad.dPdy->accept(this); 969 fs_reg dPdy = this->result; 970 971 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 972 973 /** 974 * P = u, v, r 975 * dPdx = dudx, dvdx, drdx 976 * dPdy = dudy, dvdy, drdy 977 * 978 * Load up these values: 979 * - dudx dudy dvdx dvdy drdx drdy 980 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 981 */ 982 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 983 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 984 dPdx.reg_offset++; 985 mlen += reg_width; 986 987 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 988 dPdy.reg_offset++; 989 mlen += reg_width; 990 } 991 992 inst = emit(SHADER_OPCODE_TXD, dst); 993 break; 994 } 995 case ir_txs: 996 ir->lod_info.lod->accept(this); 997 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 998 mlen += reg_width; 999 inst = emit(SHADER_OPCODE_TXS, dst); 1000 break; 1001 case ir_txf: 1002 mlen = header_present + 4 * reg_width; 1003 1004 ir->lod_info.lod->accept(this); 1005 emit(BRW_OPCODE_MOV, 1006 fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), 1007 this->result); 1008 inst = emit(SHADER_OPCODE_TXF, dst); 1009 break; 1010 } 1011 inst->base_mrf = base_mrf; 1012 inst->mlen = mlen; 1013 inst->header_present = header_present; 1014 1015 if (mlen > 11) { 1016 fail("Message length >11 disallowed by hardware\n"); 1017 } 1018 1019 return inst; 1020} 1021 1022fs_inst * 1023fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 1024 int sampler) 1025{ 1026 int mlen = 0; 1027 int base_mrf = 2; 1028 int reg_width = c->dispatch_width / 8; 1029 bool header_present = false; 1030 int offsets[3]; 1031 1032 if (ir->offset && ir->op != ir_txf) { 1033 /* The offsets set up by the ir_texture visitor are in the 1034 * m1 header, so we can't go headerless. 1035 */ 1036 header_present = true; 1037 mlen++; 1038 base_mrf--; 1039 } 1040 1041 if (ir->shadow_comparitor) { 1042 ir->shadow_comparitor->accept(this); 1043 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1044 mlen += reg_width; 1045 } 1046 1047 /* Set up the LOD info */ 1048 switch (ir->op) { 1049 case ir_tex: 1050 break; 1051 case ir_txb: 1052 ir->lod_info.bias->accept(this); 1053 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1054 mlen += reg_width; 1055 break; 1056 case ir_txl: 1057 ir->lod_info.lod->accept(this); 1058 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1059 mlen += reg_width; 1060 break; 1061 case ir_txd: { 1062 if (c->dispatch_width == 16) 1063 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 1064 1065 ir->lod_info.grad.dPdx->accept(this); 1066 fs_reg dPdx = this->result; 1067 1068 ir->lod_info.grad.dPdy->accept(this); 1069 fs_reg dPdy = this->result; 1070 1071 /* Load dPdx and the coordinate together: 1072 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 1073 */ 1074 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1075 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1076 coordinate.reg_offset++; 1077 mlen += reg_width; 1078 1079 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 1080 dPdx.reg_offset++; 1081 mlen += reg_width; 1082 1083 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 1084 dPdy.reg_offset++; 1085 mlen += reg_width; 1086 } 1087 break; 1088 } 1089 case ir_txs: 1090 ir->lod_info.lod->accept(this); 1091 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 1092 mlen += reg_width; 1093 break; 1094 case ir_txf: 1095 /* It appears that the ld instruction used for txf does its 1096 * address bounds check before adding in the offset. To work 1097 * around this, just add the integer offset to the integer texel 1098 * coordinate, and don't put the offset in the header. 1099 */ 1100 if (ir->offset) { 1101 ir_constant *offset = ir->offset->as_constant(); 1102 offsets[0] = offset->value.i[0]; 1103 offsets[1] = offset->value.i[1]; 1104 offsets[2] = offset->value.i[2]; 1105 } else { 1106 memset(offsets, 0, sizeof(offsets)); 1107 } 1108 1109 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ 1110 emit(BRW_OPCODE_ADD, 1111 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]); 1112 coordinate.reg_offset++; 1113 mlen += reg_width; 1114 1115 ir->lod_info.lod->accept(this); 1116 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), this->result); 1117 mlen += reg_width; 1118 1119 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { 1120 emit(BRW_OPCODE_ADD, 1121 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]); 1122 coordinate.reg_offset++; 1123 mlen += reg_width; 1124 } 1125 break; 1126 } 1127 1128 /* Set up the coordinate (except for cases where it was done above) */ 1129 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { 1130 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1131 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1132 coordinate.reg_offset++; 1133 mlen += reg_width; 1134 } 1135 } 1136 1137 /* Generate the SEND */ 1138 fs_inst *inst = NULL; 1139 switch (ir->op) { 1140 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; 1141 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 1142 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; 1143 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; 1144 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; 1145 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; 1146 } 1147 inst->base_mrf = base_mrf; 1148 inst->mlen = mlen; 1149 inst->header_present = header_present; 1150 1151 if (mlen > 11) { 1152 fail("Message length >11 disallowed by hardware\n"); 1153 } 1154 1155 return inst; 1156} 1157 1158void 1159fs_visitor::visit(ir_texture *ir) 1160{ 1161 fs_inst *inst = NULL; 1162 1163 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 1164 sampler = fp->Base.SamplerUnits[sampler]; 1165 1166 if (ir->coordinate) 1167 ir->coordinate->accept(this); 1168 fs_reg coordinate = this->result; 1169 1170 if (ir->offset != NULL && !(intel->gen == 7 && ir->op == ir_txf)) { 1171 uint32_t offset_bits = brw_texture_offset(ir->offset->as_constant()); 1172 1173 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 1174 emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 1175 fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD))); 1176 1177 /* Then set the offset bits in DWord 2 of the message header. */ 1178 emit(BRW_OPCODE_MOV, 1179 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1180 BRW_REGISTER_TYPE_UD)), 1181 fs_reg(brw_imm_uw(offset_bits))); 1182 } 1183 1184 /* Should be lowered by do_lower_texture_projection */ 1185 assert(!ir->projector); 1186 1187 bool needs_gl_clamp = true; 1188 1189 fs_reg scale_x, scale_y; 1190 1191 /* The 965 requires the EU to do the normalization of GL rectangle 1192 * texture coordinates. We use the program parameter state 1193 * tracking to get the scaling factor. 1194 */ 1195 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT && 1196 (intel->gen < 6 || 1197 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || 1198 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { 1199 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1200 int tokens[STATE_LENGTH] = { 1201 STATE_INTERNAL, 1202 STATE_TEXRECT_SCALE, 1203 sampler, 1204 0, 1205 0 1206 }; 1207 1208 if (c->dispatch_width == 16) { 1209 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1210 this->result = fs_reg(this, ir->type); 1211 return; 1212 } 1213 1214 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1215 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1216 1217 GLuint index = _mesa_add_state_reference(params, 1218 (gl_state_index *)tokens); 1219 1220 this->param_index[c->prog_data.nr_params] = index; 1221 this->param_offset[c->prog_data.nr_params] = 0; 1222 c->prog_data.nr_params++; 1223 this->param_index[c->prog_data.nr_params] = index; 1224 this->param_offset[c->prog_data.nr_params] = 1; 1225 c->prog_data.nr_params++; 1226 } 1227 1228 /* The 965 requires the EU to do the normalization of GL rectangle 1229 * texture coordinates. We use the program parameter state 1230 * tracking to get the scaling factor. 1231 */ 1232 if (intel->gen < 6 && 1233 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1234 fs_reg dst = fs_reg(this, ir->coordinate->type); 1235 fs_reg src = coordinate; 1236 coordinate = dst; 1237 1238 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1239 dst.reg_offset++; 1240 src.reg_offset++; 1241 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1242 } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1243 /* On gen6+, the sampler handles the rectangle coordinates 1244 * natively, without needing rescaling. But that means we have 1245 * to do GL_CLAMP clamping at the [0, width], [0, height] scale, 1246 * not [0, 1] like the default case below. 1247 */ 1248 needs_gl_clamp = false; 1249 1250 for (int i = 0; i < 2; i++) { 1251 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1252 fs_reg chan = coordinate; 1253 chan.reg_offset += i; 1254 1255 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); 1256 inst->conditional_mod = BRW_CONDITIONAL_G; 1257 1258 /* Our parameter comes in as 1.0/width or 1.0/height, 1259 * because that's what people normally want for doing 1260 * texture rectangle handling. We need width or height 1261 * for clamping, but we don't care enough to make a new 1262 * parameter type, so just invert back. 1263 */ 1264 fs_reg limit = fs_reg(this, glsl_type::float_type); 1265 emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y); 1266 emit(SHADER_OPCODE_RCP, limit, limit); 1267 1268 inst = emit(BRW_OPCODE_SEL, chan, chan, limit); 1269 inst->conditional_mod = BRW_CONDITIONAL_L; 1270 } 1271 } 1272 } 1273 1274 if (ir->coordinate && needs_gl_clamp) { 1275 for (unsigned int i = 0; 1276 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { 1277 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1278 fs_reg chan = coordinate; 1279 chan.reg_offset += i; 1280 1281 fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan); 1282 inst->saturate = true; 1283 } 1284 } 1285 } 1286 1287 /* Writemasking doesn't eliminate channels on SIMD8 texture 1288 * samples, so don't worry about them. 1289 */ 1290 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); 1291 1292 if (intel->gen >= 7) { 1293 inst = emit_texture_gen7(ir, dst, coordinate, sampler); 1294 } else if (intel->gen >= 5) { 1295 inst = emit_texture_gen5(ir, dst, coordinate, sampler); 1296 } else { 1297 inst = emit_texture_gen4(ir, dst, coordinate, sampler); 1298 } 1299 1300 /* If there's an offset, we already set up m1. To avoid the implied move, 1301 * use the null register. Otherwise, we want an implied move from g0. 1302 */ 1303 if (ir->offset != NULL || !inst->header_present) 1304 inst->src[0] = reg_undef; 1305 else 1306 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1307 1308 inst->sampler = sampler; 1309 1310 if (ir->shadow_comparitor) 1311 inst->shadow_compare = true; 1312 1313 swizzle_result(ir, dst, sampler); 1314} 1315 1316/** 1317 * Swizzle the result of a texture result. This is necessary for 1318 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1319 */ 1320void 1321fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1322{ 1323 this->result = orig_val; 1324 1325 if (ir->op == ir_txs) 1326 return; 1327 1328 if (ir->type == glsl_type::float_type) { 1329 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1330 assert(ir->sampler->type->sampler_shadow); 1331 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { 1332 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1333 1334 for (int i = 0; i < 4; i++) { 1335 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); 1336 fs_reg l = swizzled_result; 1337 l.reg_offset += i; 1338 1339 if (swiz == SWIZZLE_ZERO) { 1340 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1341 } else if (swiz == SWIZZLE_ONE) { 1342 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1343 } else { 1344 fs_reg r = orig_val; 1345 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); 1346 emit(BRW_OPCODE_MOV, l, r); 1347 } 1348 } 1349 this->result = swizzled_result; 1350 } 1351} 1352 1353void 1354fs_visitor::visit(ir_swizzle *ir) 1355{ 1356 ir->val->accept(this); 1357 fs_reg val = this->result; 1358 1359 if (ir->type->vector_elements == 1) { 1360 this->result.reg_offset += ir->mask.x; 1361 return; 1362 } 1363 1364 fs_reg result = fs_reg(this, ir->type); 1365 this->result = result; 1366 1367 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1368 fs_reg channel = val; 1369 int swiz = 0; 1370 1371 switch (i) { 1372 case 0: 1373 swiz = ir->mask.x; 1374 break; 1375 case 1: 1376 swiz = ir->mask.y; 1377 break; 1378 case 2: 1379 swiz = ir->mask.z; 1380 break; 1381 case 3: 1382 swiz = ir->mask.w; 1383 break; 1384 } 1385 1386 channel.reg_offset += swiz; 1387 emit(BRW_OPCODE_MOV, result, channel); 1388 result.reg_offset++; 1389 } 1390} 1391 1392void 1393fs_visitor::visit(ir_discard *ir) 1394{ 1395 assert(ir->condition == NULL); /* FINISHME */ 1396 1397 emit(FS_OPCODE_DISCARD); 1398} 1399 1400void 1401fs_visitor::visit(ir_constant *ir) 1402{ 1403 /* Set this->result to reg at the bottom of the function because some code 1404 * paths will cause this visitor to be applied to other fields. This will 1405 * cause the value stored in this->result to be modified. 1406 * 1407 * Make reg constant so that it doesn't get accidentally modified along the 1408 * way. Yes, I actually had this problem. :( 1409 */ 1410 const fs_reg reg(this, ir->type); 1411 fs_reg dst_reg = reg; 1412 1413 if (ir->type->is_array()) { 1414 const unsigned size = type_size(ir->type->fields.array); 1415 1416 for (unsigned i = 0; i < ir->type->length; i++) { 1417 ir->array_elements[i]->accept(this); 1418 fs_reg src_reg = this->result; 1419 1420 dst_reg.type = src_reg.type; 1421 for (unsigned j = 0; j < size; j++) { 1422 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1423 src_reg.reg_offset++; 1424 dst_reg.reg_offset++; 1425 } 1426 } 1427 } else if (ir->type->is_record()) { 1428 foreach_list(node, &ir->components) { 1429 ir_constant *const field = (ir_constant *) node; 1430 const unsigned size = type_size(field->type); 1431 1432 field->accept(this); 1433 fs_reg src_reg = this->result; 1434 1435 dst_reg.type = src_reg.type; 1436 for (unsigned j = 0; j < size; j++) { 1437 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1438 src_reg.reg_offset++; 1439 dst_reg.reg_offset++; 1440 } 1441 } 1442 } else { 1443 const unsigned size = type_size(ir->type); 1444 1445 for (unsigned i = 0; i < size; i++) { 1446 switch (ir->type->base_type) { 1447 case GLSL_TYPE_FLOAT: 1448 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1449 break; 1450 case GLSL_TYPE_UINT: 1451 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1452 break; 1453 case GLSL_TYPE_INT: 1454 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1455 break; 1456 case GLSL_TYPE_BOOL: 1457 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1458 break; 1459 default: 1460 assert(!"Non-float/uint/int/bool constant"); 1461 } 1462 dst_reg.reg_offset++; 1463 } 1464 } 1465 1466 this->result = reg; 1467} 1468 1469void 1470fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1471{ 1472 ir_expression *expr = ir->as_expression(); 1473 1474 if (expr) { 1475 fs_reg op[2]; 1476 fs_inst *inst; 1477 1478 assert(expr->get_num_operands() <= 2); 1479 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1480 assert(expr->operands[i]->type->is_scalar()); 1481 1482 expr->operands[i]->accept(this); 1483 op[i] = this->result; 1484 1485 resolve_ud_negate(&op[i]); 1486 } 1487 1488 switch (expr->operation) { 1489 case ir_unop_logic_not: 1490 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1491 inst->conditional_mod = BRW_CONDITIONAL_Z; 1492 break; 1493 1494 case ir_binop_logic_xor: 1495 case ir_binop_logic_or: 1496 case ir_binop_logic_and: 1497 goto out; 1498 1499 case ir_unop_f2b: 1500 if (intel->gen >= 6) { 1501 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1502 } else { 1503 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1504 } 1505 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1506 break; 1507 1508 case ir_unop_i2b: 1509 if (intel->gen >= 6) { 1510 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1511 } else { 1512 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1513 } 1514 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1515 break; 1516 1517 case ir_binop_greater: 1518 case ir_binop_gequal: 1519 case ir_binop_less: 1520 case ir_binop_lequal: 1521 case ir_binop_equal: 1522 case ir_binop_all_equal: 1523 case ir_binop_nequal: 1524 case ir_binop_any_nequal: 1525 resolve_bool_comparison(expr->operands[0], &op[0]); 1526 resolve_bool_comparison(expr->operands[1], &op[1]); 1527 1528 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1529 inst->conditional_mod = 1530 brw_conditional_for_comparison(expr->operation); 1531 break; 1532 1533 default: 1534 assert(!"not reached"); 1535 fail("bad cond code\n"); 1536 break; 1537 } 1538 return; 1539 } 1540 1541out: 1542 ir->accept(this); 1543 1544 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1545 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1546} 1547 1548/** 1549 * Emit a gen6 IF statement with the comparison folded into the IF 1550 * instruction. 1551 */ 1552void 1553fs_visitor::emit_if_gen6(ir_if *ir) 1554{ 1555 ir_expression *expr = ir->condition->as_expression(); 1556 1557 if (expr) { 1558 fs_reg op[2]; 1559 fs_inst *inst; 1560 fs_reg temp; 1561 1562 assert(expr->get_num_operands() <= 2); 1563 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1564 assert(expr->operands[i]->type->is_scalar()); 1565 1566 expr->operands[i]->accept(this); 1567 op[i] = this->result; 1568 } 1569 1570 switch (expr->operation) { 1571 case ir_unop_logic_not: 1572 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1573 inst->conditional_mod = BRW_CONDITIONAL_Z; 1574 return; 1575 1576 case ir_binop_logic_xor: 1577 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1578 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1579 return; 1580 1581 case ir_binop_logic_or: 1582 temp = fs_reg(this, glsl_type::bool_type); 1583 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1584 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1585 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1586 return; 1587 1588 case ir_binop_logic_and: 1589 temp = fs_reg(this, glsl_type::bool_type); 1590 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1591 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1592 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1593 return; 1594 1595 case ir_unop_f2b: 1596 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1597 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1598 return; 1599 1600 case ir_unop_i2b: 1601 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1603 return; 1604 1605 case ir_binop_greater: 1606 case ir_binop_gequal: 1607 case ir_binop_less: 1608 case ir_binop_lequal: 1609 case ir_binop_equal: 1610 case ir_binop_all_equal: 1611 case ir_binop_nequal: 1612 case ir_binop_any_nequal: 1613 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1614 inst->conditional_mod = 1615 brw_conditional_for_comparison(expr->operation); 1616 return; 1617 default: 1618 assert(!"not reached"); 1619 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1620 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1621 fail("bad condition\n"); 1622 return; 1623 } 1624 return; 1625 } 1626 1627 ir->condition->accept(this); 1628 1629 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1630 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1631} 1632 1633void 1634fs_visitor::visit(ir_if *ir) 1635{ 1636 fs_inst *inst; 1637 1638 if (intel->gen < 6 && c->dispatch_width == 16) { 1639 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1640 } 1641 1642 /* Don't point the annotation at the if statement, because then it plus 1643 * the then and else blocks get printed. 1644 */ 1645 this->base_ir = ir->condition; 1646 1647 if (intel->gen == 6) { 1648 emit_if_gen6(ir); 1649 } else { 1650 emit_bool_to_cond_code(ir->condition); 1651 1652 inst = emit(BRW_OPCODE_IF); 1653 inst->predicated = true; 1654 } 1655 1656 foreach_list(node, &ir->then_instructions) { 1657 ir_instruction *ir = (ir_instruction *)node; 1658 this->base_ir = ir; 1659 1660 ir->accept(this); 1661 } 1662 1663 if (!ir->else_instructions.is_empty()) { 1664 emit(BRW_OPCODE_ELSE); 1665 1666 foreach_list(node, &ir->else_instructions) { 1667 ir_instruction *ir = (ir_instruction *)node; 1668 this->base_ir = ir; 1669 1670 ir->accept(this); 1671 } 1672 } 1673 1674 emit(BRW_OPCODE_ENDIF); 1675} 1676 1677void 1678fs_visitor::visit(ir_loop *ir) 1679{ 1680 fs_reg counter = reg_undef; 1681 1682 if (intel->gen < 6 && c->dispatch_width == 16) { 1683 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1684 } 1685 1686 if (ir->counter) { 1687 this->base_ir = ir->counter; 1688 ir->counter->accept(this); 1689 counter = *(variable_storage(ir->counter)); 1690 1691 if (ir->from) { 1692 this->base_ir = ir->from; 1693 ir->from->accept(this); 1694 1695 emit(BRW_OPCODE_MOV, counter, this->result); 1696 } 1697 } 1698 1699 this->base_ir = NULL; 1700 emit(BRW_OPCODE_DO); 1701 1702 if (ir->to) { 1703 this->base_ir = ir->to; 1704 ir->to->accept(this); 1705 1706 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1707 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1708 1709 inst = emit(BRW_OPCODE_BREAK); 1710 inst->predicated = true; 1711 } 1712 1713 foreach_list(node, &ir->body_instructions) { 1714 ir_instruction *ir = (ir_instruction *)node; 1715 1716 this->base_ir = ir; 1717 ir->accept(this); 1718 } 1719 1720 if (ir->increment) { 1721 this->base_ir = ir->increment; 1722 ir->increment->accept(this); 1723 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1724 } 1725 1726 this->base_ir = NULL; 1727 emit(BRW_OPCODE_WHILE); 1728} 1729 1730void 1731fs_visitor::visit(ir_loop_jump *ir) 1732{ 1733 switch (ir->mode) { 1734 case ir_loop_jump::jump_break: 1735 emit(BRW_OPCODE_BREAK); 1736 break; 1737 case ir_loop_jump::jump_continue: 1738 emit(BRW_OPCODE_CONTINUE); 1739 break; 1740 } 1741} 1742 1743void 1744fs_visitor::visit(ir_call *ir) 1745{ 1746 assert(!"FINISHME"); 1747} 1748 1749void 1750fs_visitor::visit(ir_return *ir) 1751{ 1752 assert(!"FINISHME"); 1753} 1754 1755void 1756fs_visitor::visit(ir_function *ir) 1757{ 1758 /* Ignore function bodies other than main() -- we shouldn't see calls to 1759 * them since they should all be inlined before we get to ir_to_mesa. 1760 */ 1761 if (strcmp(ir->name, "main") == 0) { 1762 const ir_function_signature *sig; 1763 exec_list empty; 1764 1765 sig = ir->matching_signature(&empty); 1766 1767 assert(sig); 1768 1769 foreach_list(node, &sig->body) { 1770 ir_instruction *ir = (ir_instruction *)node; 1771 this->base_ir = ir; 1772 1773 ir->accept(this); 1774 } 1775 } 1776} 1777 1778void 1779fs_visitor::visit(ir_function_signature *ir) 1780{ 1781 assert(!"not reached"); 1782 (void)ir; 1783} 1784 1785fs_inst * 1786fs_visitor::emit(fs_inst inst) 1787{ 1788 fs_inst *list_inst = new(mem_ctx) fs_inst; 1789 *list_inst = inst; 1790 1791 if (force_uncompressed_stack > 0) 1792 list_inst->force_uncompressed = true; 1793 else if (force_sechalf_stack > 0) 1794 list_inst->force_sechalf = true; 1795 1796 list_inst->annotation = this->current_annotation; 1797 list_inst->ir = this->base_ir; 1798 1799 this->instructions.push_tail(list_inst); 1800 1801 return list_inst; 1802} 1803 1804/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1805void 1806fs_visitor::emit_dummy_fs() 1807{ 1808 int reg_width = c->dispatch_width / 8; 1809 1810 /* Everyone's favorite color. */ 1811 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); 1812 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); 1813 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); 1814 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); 1815 1816 fs_inst *write; 1817 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1818 write->base_mrf = 2; 1819 write->mlen = 4 * reg_width; 1820 write->eot = true; 1821} 1822 1823/* The register location here is relative to the start of the URB 1824 * data. It will get adjusted to be a real location before 1825 * generate_code() time. 1826 */ 1827struct brw_reg 1828fs_visitor::interp_reg(int location, int channel) 1829{ 1830 int regnr = urb_setup[location] * 2 + channel / 2; 1831 int stride = (channel & 1) * 4; 1832 1833 assert(urb_setup[location] != -1); 1834 1835 return brw_vec1_grf(regnr, stride); 1836} 1837 1838/** Emits the interpolation for the varying inputs. */ 1839void 1840fs_visitor::emit_interpolation_setup_gen4() 1841{ 1842 this->current_annotation = "compute pixel centers"; 1843 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1844 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1845 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1846 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1847 1848 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1849 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1850 1851 this->current_annotation = "compute pixel deltas from v0"; 1852 if (brw->has_pln) { 1853 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1854 fs_reg(this, glsl_type::vec2_type); 1855 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1856 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; 1857 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; 1858 } else { 1859 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1860 fs_reg(this, glsl_type::float_type); 1861 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1862 fs_reg(this, glsl_type::float_type); 1863 } 1864 emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1865 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1866 emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1867 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1868 1869 this->current_annotation = "compute pos.w and 1/pos.w"; 1870 /* Compute wpos.w. It's always in our setup, since it's needed to 1871 * interpolate the other attributes. 1872 */ 1873 this->wpos_w = fs_reg(this, glsl_type::float_type); 1874 emit(FS_OPCODE_LINTERP, wpos_w, 1875 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1876 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1877 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1878 /* Compute the pixel 1/W value from wpos.w. */ 1879 this->pixel_w = fs_reg(this, glsl_type::float_type); 1880 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1881 this->current_annotation = NULL; 1882} 1883 1884/** Emits the interpolation for the varying inputs. */ 1885void 1886fs_visitor::emit_interpolation_setup_gen6() 1887{ 1888 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1889 1890 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1891 this->current_annotation = "compute pixel centers"; 1892 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1893 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1894 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1895 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1896 emit(BRW_OPCODE_ADD, 1897 int_pixel_x, 1898 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1899 fs_reg(brw_imm_v(0x10101010))); 1900 emit(BRW_OPCODE_ADD, 1901 int_pixel_y, 1902 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1903 fs_reg(brw_imm_v(0x11001100))); 1904 1905 /* As of gen6, we can no longer mix float and int sources. We have 1906 * to turn the integer pixel centers into floats for their actual 1907 * use. 1908 */ 1909 this->pixel_x = fs_reg(this, glsl_type::float_type); 1910 this->pixel_y = fs_reg(this, glsl_type::float_type); 1911 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1912 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1913 1914 this->current_annotation = "compute pos.w"; 1915 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1916 this->wpos_w = fs_reg(this, glsl_type::float_type); 1917 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1918 1919 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { 1920 uint8_t reg = c->barycentric_coord_reg[i]; 1921 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); 1922 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); 1923 } 1924 1925 this->current_annotation = NULL; 1926} 1927 1928void 1929fs_visitor::emit_color_write(int target, int index, int first_color_mrf) 1930{ 1931 int reg_width = c->dispatch_width / 8; 1932 fs_inst *inst; 1933 fs_reg color = outputs[target]; 1934 fs_reg mrf; 1935 1936 /* If there's no color data to be written, skip it. */ 1937 if (color.file == BAD_FILE) 1938 return; 1939 1940 color.reg_offset += index; 1941 1942 if (c->dispatch_width == 8 || intel->gen >= 6) { 1943 /* SIMD8 write looks like: 1944 * m + 0: r0 1945 * m + 1: r1 1946 * m + 2: g0 1947 * m + 3: g1 1948 * 1949 * gen6 SIMD16 DP write looks like: 1950 * m + 0: r0 1951 * m + 1: r1 1952 * m + 2: g0 1953 * m + 3: g1 1954 * m + 4: b0 1955 * m + 5: b1 1956 * m + 6: a0 1957 * m + 7: a1 1958 */ 1959 inst = emit(BRW_OPCODE_MOV, 1960 fs_reg(MRF, first_color_mrf + index * reg_width, color.type), 1961 color); 1962 inst->saturate = c->key.clamp_fragment_color; 1963 } else { 1964 /* pre-gen6 SIMD16 single source DP write looks like: 1965 * m + 0: r0 1966 * m + 1: g0 1967 * m + 2: b0 1968 * m + 3: a0 1969 * m + 4: r1 1970 * m + 5: g1 1971 * m + 6: b1 1972 * m + 7: a1 1973 */ 1974 if (brw->has_compr4) { 1975 /* By setting the high bit of the MRF register number, we 1976 * indicate that we want COMPR4 mode - instead of doing the 1977 * usual destination + 1 for the second half we get 1978 * destination + 4. 1979 */ 1980 inst = emit(BRW_OPCODE_MOV, 1981 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, 1982 color.type), 1983 color); 1984 inst->saturate = c->key.clamp_fragment_color; 1985 } else { 1986 push_force_uncompressed(); 1987 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index, 1988 color.type), 1989 color); 1990 inst->saturate = c->key.clamp_fragment_color; 1991 pop_force_uncompressed(); 1992 1993 push_force_sechalf(); 1994 color.sechalf = true; 1995 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4, 1996 color.type), 1997 color); 1998 inst->saturate = c->key.clamp_fragment_color; 1999 pop_force_sechalf(); 2000 color.sechalf = false; 2001 } 2002 } 2003} 2004 2005void 2006fs_visitor::emit_fb_writes() 2007{ 2008 this->current_annotation = "FB write header"; 2009 bool header_present = true; 2010 /* We can potentially have a message length of up to 15, so we have to set 2011 * base_mrf to either 0 or 1 in order to fit in m0..m15. 2012 */ 2013 int base_mrf = 1; 2014 int nr = base_mrf; 2015 int reg_width = c->dispatch_width / 8; 2016 bool do_dual_src = this->dual_src_output.file != BAD_FILE; 2017 2018 if (c->dispatch_width == 16 && do_dual_src) { 2019 fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); 2020 do_dual_src = false; 2021 } 2022 2023 /* From the Sandy Bridge PRM, volume 4, page 198: 2024 * 2025 * "Dispatched Pixel Enables. One bit per pixel indicating 2026 * which pixels were originally enabled when the thread was 2027 * dispatched. This field is only required for the end-of- 2028 * thread message and on all dual-source messages." 2029 */ 2030 if (intel->gen >= 6 && 2031 !this->fp->UsesKill && 2032 !do_dual_src && 2033 c->key.nr_color_regions == 1) { 2034 header_present = false; 2035 } 2036 2037 if (header_present) { 2038 /* m2, m3 header */ 2039 nr += 2; 2040 } 2041 2042 if (c->aa_dest_stencil_reg) { 2043 push_force_uncompressed(); 2044 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2045 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 2046 pop_force_uncompressed(); 2047 } 2048 2049 /* Reserve space for color. It'll be filled in per MRT below. */ 2050 int color_mrf = nr; 2051 nr += 4 * reg_width; 2052 if (do_dual_src) 2053 nr += 4; 2054 2055 if (c->source_depth_to_render_target) { 2056 if (intel->gen == 6 && c->dispatch_width == 16) { 2057 /* For outputting oDepth on gen6, SIMD8 writes have to be 2058 * used. This would require 8-wide moves of each half to 2059 * message regs, kind of like pre-gen5 SIMD16 FB writes. 2060 * Just bail on doing so for now. 2061 */ 2062 fail("Missing support for simd16 depth writes on gen6\n"); 2063 } 2064 2065 if (c->computes_depth) { 2066 /* Hand over gl_FragDepth. */ 2067 assert(this->frag_depth); 2068 fs_reg depth = *(variable_storage(this->frag_depth)); 2069 2070 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 2071 } else { 2072 /* Pass through the payload depth. */ 2073 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2074 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 2075 } 2076 nr += reg_width; 2077 } 2078 2079 if (c->dest_depth_reg) { 2080 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2081 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 2082 nr += reg_width; 2083 } 2084 2085 if (do_dual_src) { 2086 fs_reg src0 = this->outputs[0]; 2087 fs_reg src1 = this->dual_src_output; 2088 2089 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2090 "FB write src0"); 2091 for (int i = 0; i < 4; i++) { 2092 fs_inst *inst = emit(BRW_OPCODE_MOV, 2093 fs_reg(MRF, color_mrf + i, src0.type), 2094 src0); 2095 src0.reg_offset++; 2096 inst->saturate = c->key.clamp_fragment_color; 2097 } 2098 2099 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2100 "FB write src1"); 2101 for (int i = 0; i < 4; i++) { 2102 fs_inst *inst = emit(BRW_OPCODE_MOV, 2103 fs_reg(MRF, color_mrf + 4 + i, src1.type), 2104 src1); 2105 src1.reg_offset++; 2106 inst->saturate = c->key.clamp_fragment_color; 2107 } 2108 2109 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2110 inst->target = 0; 2111 inst->base_mrf = base_mrf; 2112 inst->mlen = nr - base_mrf; 2113 inst->eot = true; 2114 inst->header_present = header_present; 2115 2116 c->prog_data.dual_src_blend = true; 2117 this->current_annotation = NULL; 2118 return; 2119 } 2120 2121 for (int target = 0; target < c->key.nr_color_regions; target++) { 2122 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2123 "FB write target %d", 2124 target); 2125 for (unsigned i = 0; i < this->output_components[target]; i++) 2126 emit_color_write(target, i, color_mrf); 2127 2128 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2129 inst->target = target; 2130 inst->base_mrf = base_mrf; 2131 inst->mlen = nr - base_mrf; 2132 if (target == c->key.nr_color_regions - 1) 2133 inst->eot = true; 2134 inst->header_present = header_present; 2135 } 2136 2137 if (c->key.nr_color_regions == 0) { 2138 /* Even if there's no color buffers enabled, we still need to send 2139 * alpha out the pipeline to our null renderbuffer to support 2140 * alpha-testing, alpha-to-coverage, and so on. 2141 */ 2142 emit_color_write(0, 3, color_mrf); 2143 2144 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2145 inst->base_mrf = base_mrf; 2146 inst->mlen = nr - base_mrf; 2147 inst->eot = true; 2148 inst->header_present = header_present; 2149 } 2150 2151 this->current_annotation = NULL; 2152} 2153 2154void 2155fs_visitor::resolve_ud_negate(fs_reg *reg) 2156{ 2157 if (reg->type != BRW_REGISTER_TYPE_UD || 2158 !reg->negate) 2159 return; 2160 2161 fs_reg temp = fs_reg(this, glsl_type::uint_type); 2162 emit(BRW_OPCODE_MOV, temp, *reg); 2163 *reg = temp; 2164} 2165 2166void 2167fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) 2168{ 2169 if (rvalue->type != glsl_type::bool_type) 2170 return; 2171 2172 fs_reg temp = fs_reg(this, glsl_type::bool_type); 2173 emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1)); 2174 *reg = temp; 2175} 2176 2177fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 2178 struct brw_shader *shader) 2179{ 2180 this->c = c; 2181 this->p = &c->func; 2182 this->brw = p->brw; 2183 this->fp = (struct gl_fragment_program *) 2184 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 2185 this->prog = prog; 2186 this->intel = &brw->intel; 2187 this->ctx = &intel->ctx; 2188 this->mem_ctx = ralloc_context(NULL); 2189 this->shader = shader; 2190 this->failed = false; 2191 this->variable_ht = hash_table_ctor(0, 2192 hash_table_pointer_hash, 2193 hash_table_pointer_compare); 2194 2195 /* There's a question that appears to be left open in the spec: 2196 * How do implicit dst conversions interact with the CMP 2197 * instruction or conditional mods? On gen6, the instruction: 2198 * 2199 * CMP null<d> src0<f> src1<f> 2200 * 2201 * will do src1 - src0 and compare that result as if it was an 2202 * integer. On gen4, it will do src1 - src0 as float, convert 2203 * the result to int, and compare as int. In between, it 2204 * appears that it does src1 - src0 and does the compare in the 2205 * execution type so dst type doesn't matter. 2206 */ 2207 if (this->intel->gen > 4) 2208 this->reg_null_cmp = reg_null_d; 2209 else 2210 this->reg_null_cmp = reg_null_f; 2211 2212 this->frag_depth = NULL; 2213 memset(this->outputs, 0, sizeof(this->outputs)); 2214 this->first_non_payload_grf = 0; 2215 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 2216 2217 this->current_annotation = NULL; 2218 this->base_ir = NULL; 2219 2220 this->virtual_grf_sizes = NULL; 2221 this->virtual_grf_count = 0; 2222 this->virtual_grf_array_size = 0; 2223 this->virtual_grf_def = NULL; 2224 this->virtual_grf_use = NULL; 2225 this->live_intervals_valid = false; 2226 2227 this->force_uncompressed_stack = 0; 2228 this->force_sechalf_stack = 0; 2229} 2230 2231fs_visitor::~fs_visitor() 2232{ 2233 ralloc_free(this->mem_ctx); 2234 hash_table_dtor(this->variable_ht); 2235} 2236