brw_fs_visitor.cpp revision 11a7b93592c22c8165f8fde6395f76778fca452e
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "glsl/glsl_types.h" 50#include "glsl/ir_optimization.h" 51#include "glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (ir->mode == ir_var_in) { 62 if (!strcmp(ir->name, "gl_FragCoord")) { 63 reg = emit_fragcoord_interpolation(ir); 64 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 65 reg = emit_frontfacing_interpolation(ir); 66 } else { 67 reg = emit_general_interpolation(ir); 68 } 69 assert(reg); 70 hash_table_insert(this->variable_ht, reg, ir); 71 return; 72 } else if (ir->mode == ir_var_out) { 73 reg = new(this->mem_ctx) fs_reg(this, ir->type); 74 75 if (ir->index > 0) { 76 assert(ir->location == FRAG_RESULT_DATA0); 77 assert(ir->index == 1); 78 this->dual_src_output = *reg; 79 } else if (ir->location == FRAG_RESULT_COLOR) { 80 /* Writing gl_FragColor outputs to all color regions. */ 81 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { 82 this->outputs[i] = *reg; 83 this->output_components[i] = 4; 84 } 85 } else if (ir->location == FRAG_RESULT_DEPTH) { 86 this->frag_depth = ir; 87 } else { 88 /* gl_FragData or a user-defined FS output */ 89 assert(ir->location >= FRAG_RESULT_DATA0 && 90 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); 91 92 int vector_elements = 93 ir->type->is_array() ? ir->type->fields.array->vector_elements 94 : ir->type->vector_elements; 95 96 /* General color output. */ 97 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { 98 int output = ir->location - FRAG_RESULT_DATA0 + i; 99 this->outputs[output] = *reg; 100 this->outputs[output].reg_offset += vector_elements * i; 101 this->output_components[output] = vector_elements; 102 } 103 } 104 } else if (ir->mode == ir_var_uniform) { 105 int param_index = c->prog_data.nr_params; 106 107 if (c->dispatch_width == 16) { 108 if (!variable_storage(ir)) { 109 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 110 } 111 return; 112 } 113 114 if (!strncmp(ir->name, "gl_", 3)) { 115 setup_builtin_uniform_values(ir); 116 } else { 117 setup_uniform_values(ir->location, ir->type); 118 } 119 120 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 121 reg->type = brw_type_for_base_type(ir->type); 122 } 123 124 if (!reg) 125 reg = new(this->mem_ctx) fs_reg(this, ir->type); 126 127 hash_table_insert(this->variable_ht, reg, ir); 128} 129 130void 131fs_visitor::visit(ir_dereference_variable *ir) 132{ 133 fs_reg *reg = variable_storage(ir->var); 134 this->result = *reg; 135} 136 137void 138fs_visitor::visit(ir_dereference_record *ir) 139{ 140 const glsl_type *struct_type = ir->record->type; 141 142 ir->record->accept(this); 143 144 unsigned int offset = 0; 145 for (unsigned int i = 0; i < struct_type->length; i++) { 146 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 147 break; 148 offset += type_size(struct_type->fields.structure[i].type); 149 } 150 this->result.reg_offset += offset; 151 this->result.type = brw_type_for_base_type(ir->type); 152} 153 154void 155fs_visitor::visit(ir_dereference_array *ir) 156{ 157 ir_constant *index; 158 int element_size; 159 160 ir->array->accept(this); 161 index = ir->array_index->as_constant(); 162 163 element_size = type_size(ir->type); 164 this->result.type = brw_type_for_base_type(ir->type); 165 166 if (index) { 167 assert(this->result.file == UNIFORM || this->result.file == GRF); 168 this->result.reg_offset += index->value.i[0] * element_size; 169 } else { 170 assert(!"FINISHME: non-constant array element"); 171 } 172} 173 174/* Instruction selection: Produce a MOV.sat instead of 175 * MIN(MAX(val, 0), 1) when possible. 176 */ 177bool 178fs_visitor::try_emit_saturate(ir_expression *ir) 179{ 180 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 181 182 if (!sat_val) 183 return false; 184 185 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); 186 187 sat_val->accept(this); 188 fs_reg src = this->result; 189 190 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); 191 192 /* If the last instruction from our accept() didn't generate our 193 * src, generate a saturated MOV 194 */ 195 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); 196 if (!modify || modify->regs_written() != 1) { 197 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 198 inst->saturate = true; 199 } else { 200 modify->saturate = true; 201 this->result = src; 202 } 203 204 205 return true; 206} 207 208bool 209fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) 210{ 211 /* 3-src instructions were introduced in gen6. */ 212 if (intel->gen < 6) 213 return false; 214 215 /* MAD can only handle floating-point data. */ 216 if (ir->type != glsl_type::float_type) 217 return false; 218 219 ir_rvalue *nonmul = ir->operands[1 - mul_arg]; 220 ir_expression *mul = ir->operands[mul_arg]->as_expression(); 221 222 if (!mul || mul->operation != ir_binop_mul) 223 return false; 224 225 if (nonmul->as_constant() || 226 mul->operands[0]->as_constant() || 227 mul->operands[1]->as_constant()) 228 return false; 229 230 nonmul->accept(this); 231 fs_reg src0 = this->result; 232 233 mul->operands[0]->accept(this); 234 fs_reg src1 = this->result; 235 236 mul->operands[1]->accept(this); 237 fs_reg src2 = this->result; 238 239 this->result = fs_reg(this, ir->type); 240 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); 241 242 return true; 243} 244 245void 246fs_visitor::visit(ir_expression *ir) 247{ 248 unsigned int operand; 249 fs_reg op[2], temp; 250 fs_inst *inst; 251 252 assert(ir->get_num_operands() <= 2); 253 254 if (try_emit_saturate(ir)) 255 return; 256 if (ir->operation == ir_binop_add) { 257 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) 258 return; 259 } 260 261 for (operand = 0; operand < ir->get_num_operands(); operand++) { 262 ir->operands[operand]->accept(this); 263 if (this->result.file == BAD_FILE) { 264 ir_print_visitor v; 265 fail("Failed to get tree for expression operand:\n"); 266 ir->operands[operand]->accept(&v); 267 } 268 op[operand] = this->result; 269 270 /* Matrix expression operands should have been broken down to vector 271 * operations already. 272 */ 273 assert(!ir->operands[operand]->type->is_matrix()); 274 /* And then those vector operands should have been broken down to scalar. 275 */ 276 assert(!ir->operands[operand]->type->is_vector()); 277 } 278 279 /* Storage for our result. If our result goes into an assignment, it will 280 * just get copy-propagated out, so no worries. 281 */ 282 this->result = fs_reg(this, ir->type); 283 284 switch (ir->operation) { 285 case ir_unop_logic_not: 286 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 287 * ones complement of the whole register, not just bit 0. 288 */ 289 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 290 break; 291 case ir_unop_neg: 292 op[0].negate = !op[0].negate; 293 this->result = op[0]; 294 break; 295 case ir_unop_abs: 296 op[0].abs = true; 297 op[0].negate = false; 298 this->result = op[0]; 299 break; 300 case ir_unop_sign: 301 temp = fs_reg(this, ir->type); 302 303 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 304 305 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 306 inst->conditional_mod = BRW_CONDITIONAL_G; 307 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 308 inst->predicated = true; 309 310 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 311 inst->conditional_mod = BRW_CONDITIONAL_L; 312 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 313 inst->predicated = true; 314 315 break; 316 case ir_unop_rcp: 317 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 318 break; 319 320 case ir_unop_exp2: 321 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 322 break; 323 case ir_unop_log2: 324 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 325 break; 326 case ir_unop_exp: 327 case ir_unop_log: 328 assert(!"not reached: should be handled by ir_explog_to_explog2"); 329 break; 330 case ir_unop_sin: 331 case ir_unop_sin_reduced: 332 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 333 break; 334 case ir_unop_cos: 335 case ir_unop_cos_reduced: 336 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 337 break; 338 339 case ir_unop_dFdx: 340 emit(FS_OPCODE_DDX, this->result, op[0]); 341 break; 342 case ir_unop_dFdy: 343 emit(FS_OPCODE_DDY, this->result, op[0]); 344 break; 345 346 case ir_binop_add: 347 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 348 break; 349 case ir_binop_sub: 350 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 351 break; 352 353 case ir_binop_mul: 354 if (ir->type->is_integer()) { 355 /* For integer multiplication, the MUL uses the low 16 bits 356 * of one of the operands (src0 on gen6, src1 on gen7). The 357 * MACH accumulates in the contribution of the upper 16 bits 358 * of that operand. 359 * 360 * FINISHME: Emit just the MUL if we know an operand is small 361 * enough. 362 */ 363 if (intel->gen >= 7 && c->dispatch_width == 16) 364 fail("16-wide explicit accumulator operands unsupported\n"); 365 366 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); 367 368 emit(BRW_OPCODE_MUL, acc, op[0], op[1]); 369 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); 370 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); 371 } else { 372 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 373 } 374 break; 375 case ir_binop_div: 376 if (intel->gen >= 7 && c->dispatch_width == 16) 377 fail("16-wide INTDIV unsupported\n"); 378 379 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ 380 assert(ir->type->is_integer()); 381 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); 382 break; 383 case ir_binop_mod: 384 if (intel->gen >= 7 && c->dispatch_width == 16) 385 fail("16-wide INTDIV unsupported\n"); 386 387 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 388 assert(ir->type->is_integer()); 389 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); 390 break; 391 392 case ir_binop_less: 393 case ir_binop_greater: 394 case ir_binop_lequal: 395 case ir_binop_gequal: 396 case ir_binop_equal: 397 case ir_binop_all_equal: 398 case ir_binop_nequal: 399 case ir_binop_any_nequal: 400 temp = this->result; 401 /* original gen4 does implicit conversion before comparison. */ 402 if (intel->gen < 5) 403 temp.type = op[0].type; 404 405 resolve_ud_negate(&op[0]); 406 resolve_ud_negate(&op[1]); 407 408 resolve_bool_comparison(ir->operands[0], &op[0]); 409 resolve_bool_comparison(ir->operands[1], &op[1]); 410 411 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 412 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 413 break; 414 415 case ir_binop_logic_xor: 416 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 417 break; 418 419 case ir_binop_logic_or: 420 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 421 break; 422 423 case ir_binop_logic_and: 424 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 425 break; 426 427 case ir_binop_dot: 428 case ir_unop_any: 429 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 430 break; 431 432 case ir_unop_noise: 433 assert(!"not reached: should be handled by lower_noise"); 434 break; 435 436 case ir_quadop_vector: 437 assert(!"not reached: should be handled by lower_quadop_vector"); 438 break; 439 440 case ir_unop_sqrt: 441 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 442 break; 443 444 case ir_unop_rsq: 445 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 446 break; 447 448 case ir_unop_bitcast_i2f: 449 case ir_unop_bitcast_u2f: 450 op[0].type = BRW_REGISTER_TYPE_F; 451 this->result = op[0]; 452 break; 453 case ir_unop_i2u: 454 case ir_unop_bitcast_f2u: 455 op[0].type = BRW_REGISTER_TYPE_UD; 456 this->result = op[0]; 457 break; 458 case ir_unop_u2i: 459 case ir_unop_bitcast_f2i: 460 op[0].type = BRW_REGISTER_TYPE_D; 461 this->result = op[0]; 462 break; 463 case ir_unop_i2f: 464 case ir_unop_u2f: 465 case ir_unop_f2i: 466 case ir_unop_f2u: 467 emit(BRW_OPCODE_MOV, this->result, op[0]); 468 break; 469 470 case ir_unop_b2i: 471 inst = emit(BRW_OPCODE_AND, this->result, op[0], fs_reg(1)); 472 break; 473 case ir_unop_b2f: 474 temp = fs_reg(this, glsl_type::int_type); 475 emit(BRW_OPCODE_AND, temp, op[0], fs_reg(1)); 476 emit(BRW_OPCODE_MOV, this->result, temp); 477 break; 478 479 case ir_unop_f2b: 480 case ir_unop_i2b: 481 temp = this->result; 482 /* original gen4 does implicit conversion before comparison. */ 483 if (intel->gen < 5) 484 temp.type = op[0].type; 485 486 resolve_ud_negate(&op[0]); 487 488 inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)); 489 inst->conditional_mod = BRW_CONDITIONAL_NZ; 490 break; 491 492 case ir_unop_trunc: 493 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 494 break; 495 case ir_unop_ceil: 496 op[0].negate = !op[0].negate; 497 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 498 this->result.negate = true; 499 break; 500 case ir_unop_floor: 501 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 502 break; 503 case ir_unop_fract: 504 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 505 break; 506 case ir_unop_round_even: 507 emit(BRW_OPCODE_RNDE, this->result, op[0]); 508 break; 509 510 case ir_binop_min: 511 resolve_ud_negate(&op[0]); 512 resolve_ud_negate(&op[1]); 513 514 if (intel->gen >= 6) { 515 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 516 inst->conditional_mod = BRW_CONDITIONAL_L; 517 } else { 518 /* Unalias the destination */ 519 this->result = fs_reg(this, ir->type); 520 521 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 522 inst->conditional_mod = BRW_CONDITIONAL_L; 523 524 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 525 inst->predicated = true; 526 } 527 break; 528 case ir_binop_max: 529 resolve_ud_negate(&op[0]); 530 resolve_ud_negate(&op[1]); 531 532 if (intel->gen >= 6) { 533 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 534 inst->conditional_mod = BRW_CONDITIONAL_GE; 535 } else { 536 /* Unalias the destination */ 537 this->result = fs_reg(this, ir->type); 538 539 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 540 inst->conditional_mod = BRW_CONDITIONAL_G; 541 542 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 543 inst->predicated = true; 544 } 545 break; 546 547 case ir_binop_pow: 548 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 549 break; 550 551 case ir_unop_bit_not: 552 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 553 break; 554 case ir_binop_bit_and: 555 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 556 break; 557 case ir_binop_bit_xor: 558 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 559 break; 560 case ir_binop_bit_or: 561 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 562 break; 563 564 case ir_binop_lshift: 565 inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]); 566 break; 567 568 case ir_binop_rshift: 569 if (ir->type->base_type == GLSL_TYPE_INT) 570 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]); 571 else 572 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]); 573 break; 574 } 575} 576 577void 578fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 579 const glsl_type *type, bool predicated) 580{ 581 switch (type->base_type) { 582 case GLSL_TYPE_FLOAT: 583 case GLSL_TYPE_UINT: 584 case GLSL_TYPE_INT: 585 case GLSL_TYPE_BOOL: 586 for (unsigned int i = 0; i < type->components(); i++) { 587 l.type = brw_type_for_base_type(type); 588 r.type = brw_type_for_base_type(type); 589 590 if (predicated || !l.equals(r)) { 591 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 592 inst->predicated = predicated; 593 } 594 595 l.reg_offset++; 596 r.reg_offset++; 597 } 598 break; 599 case GLSL_TYPE_ARRAY: 600 for (unsigned int i = 0; i < type->length; i++) { 601 emit_assignment_writes(l, r, type->fields.array, predicated); 602 } 603 break; 604 605 case GLSL_TYPE_STRUCT: 606 for (unsigned int i = 0; i < type->length; i++) { 607 emit_assignment_writes(l, r, type->fields.structure[i].type, 608 predicated); 609 } 610 break; 611 612 case GLSL_TYPE_SAMPLER: 613 break; 614 615 default: 616 assert(!"not reached"); 617 break; 618 } 619} 620 621/* If the RHS processing resulted in an instruction generating a 622 * temporary value, and it would be easy to rewrite the instruction to 623 * generate its result right into the LHS instead, do so. This ends 624 * up reliably removing instructions where it can be tricky to do so 625 * later without real UD chain information. 626 */ 627bool 628fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 629 fs_reg dst, 630 fs_reg src, 631 fs_inst *pre_rhs_inst, 632 fs_inst *last_rhs_inst) 633{ 634 /* Only attempt if we're doing a direct assignment. */ 635 if (ir->condition || 636 !(ir->lhs->type->is_scalar() || 637 (ir->lhs->type->is_vector() && 638 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) 639 return false; 640 641 /* Make sure the last instruction generated our source reg. */ 642 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, 643 last_rhs_inst, 644 src); 645 if (!modify) 646 return false; 647 648 /* If last_rhs_inst wrote a different number of components than our LHS, 649 * we can't safely rewrite it. 650 */ 651 if (ir->lhs->type->vector_elements != modify->regs_written()) 652 return false; 653 654 /* Success! Rewrite the instruction. */ 655 modify->dst = dst; 656 657 return true; 658} 659 660void 661fs_visitor::visit(ir_assignment *ir) 662{ 663 fs_reg l, r; 664 fs_inst *inst; 665 666 /* FINISHME: arrays on the lhs */ 667 ir->lhs->accept(this); 668 l = this->result; 669 670 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); 671 672 ir->rhs->accept(this); 673 r = this->result; 674 675 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); 676 677 assert(l.file != BAD_FILE); 678 assert(r.file != BAD_FILE); 679 680 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) 681 return; 682 683 if (ir->condition) { 684 emit_bool_to_cond_code(ir->condition); 685 } 686 687 if (ir->lhs->type->is_scalar() || 688 ir->lhs->type->is_vector()) { 689 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 690 if (ir->write_mask & (1 << i)) { 691 inst = emit(BRW_OPCODE_MOV, l, r); 692 if (ir->condition) 693 inst->predicated = true; 694 r.reg_offset++; 695 } 696 l.reg_offset++; 697 } 698 } else { 699 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 700 } 701} 702 703fs_inst * 704fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 705 int sampler) 706{ 707 int mlen; 708 int base_mrf = 1; 709 bool simd16 = false; 710 fs_reg orig_dst; 711 712 /* g0 header. */ 713 mlen = 1; 714 715 if (ir->shadow_comparitor && ir->op != ir_txd) { 716 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 717 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 718 coordinate.reg_offset++; 719 } 720 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 721 mlen += 3; 722 723 if (ir->op == ir_tex) { 724 /* There's no plain shadow compare message, so we use shadow 725 * compare with a bias of 0.0. 726 */ 727 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 728 mlen++; 729 } else if (ir->op == ir_txb) { 730 ir->lod_info.bias->accept(this); 731 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 732 mlen++; 733 } else { 734 assert(ir->op == ir_txl); 735 ir->lod_info.lod->accept(this); 736 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 737 mlen++; 738 } 739 740 ir->shadow_comparitor->accept(this); 741 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 742 mlen++; 743 } else if (ir->op == ir_tex) { 744 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 745 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 746 coordinate.reg_offset++; 747 } 748 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 749 mlen += 3; 750 } else if (ir->op == ir_txd) { 751 ir->lod_info.grad.dPdx->accept(this); 752 fs_reg dPdx = this->result; 753 754 ir->lod_info.grad.dPdy->accept(this); 755 fs_reg dPdy = this->result; 756 757 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 758 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 759 coordinate.reg_offset++; 760 } 761 /* the slots for u and v are always present, but r is optional */ 762 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 763 764 /* P = u, v, r 765 * dPdx = dudx, dvdx, drdx 766 * dPdy = dudy, dvdy, drdy 767 * 768 * 1-arg: Does not exist. 769 * 770 * 2-arg: dudx dvdx dudy dvdy 771 * dPdx.x dPdx.y dPdy.x dPdy.y 772 * m4 m5 m6 m7 773 * 774 * 3-arg: dudx dvdx drdx dudy dvdy drdy 775 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 776 * m5 m6 m7 m8 m9 m10 777 */ 778 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 779 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 780 dPdx.reg_offset++; 781 } 782 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 783 784 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 785 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 786 dPdy.reg_offset++; 787 } 788 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 789 } else if (ir->op == ir_txs) { 790 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ 791 simd16 = true; 792 ir->lod_info.lod->accept(this); 793 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 794 mlen += 2; 795 } else { 796 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 797 * instructions. We'll need to do SIMD16 here. 798 */ 799 simd16 = true; 800 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); 801 802 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 803 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), 804 coordinate); 805 coordinate.reg_offset++; 806 } 807 808 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to 809 * be necessary for TXF (ld), but seems wise to do for all messages. 810 */ 811 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { 812 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); 813 } 814 815 /* lod/bias appears after u/v/r. */ 816 mlen += 6; 817 818 if (ir->op == ir_txb) { 819 ir->lod_info.bias->accept(this); 820 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 821 mlen++; 822 } else { 823 ir->lod_info.lod->accept(this); 824 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, this->result.type), 825 this->result); 826 mlen++; 827 } 828 829 /* The unused upper half. */ 830 mlen++; 831 } 832 833 if (simd16) { 834 /* Now, since we're doing simd16, the return is 2 interleaved 835 * vec4s where the odd-indexed ones are junk. We'll need to move 836 * this weirdness around to the expected layout. 837 */ 838 orig_dst = dst; 839 const glsl_type *vec_type = 840 glsl_type::get_instance(ir->type->base_type, 4, 1); 841 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); 842 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) 843 : BRW_REGISTER_TYPE_F; 844 } 845 846 fs_inst *inst = NULL; 847 switch (ir->op) { 848 case ir_tex: 849 inst = emit(SHADER_OPCODE_TEX, dst); 850 break; 851 case ir_txb: 852 inst = emit(FS_OPCODE_TXB, dst); 853 break; 854 case ir_txl: 855 inst = emit(SHADER_OPCODE_TXL, dst); 856 break; 857 case ir_txd: 858 inst = emit(SHADER_OPCODE_TXD, dst); 859 break; 860 case ir_txs: 861 inst = emit(SHADER_OPCODE_TXS, dst); 862 break; 863 case ir_txf: 864 inst = emit(SHADER_OPCODE_TXF, dst); 865 break; 866 } 867 inst->base_mrf = base_mrf; 868 inst->mlen = mlen; 869 inst->header_present = true; 870 871 if (simd16) { 872 for (int i = 0; i < 4; i++) { 873 emit(BRW_OPCODE_MOV, orig_dst, dst); 874 orig_dst.reg_offset++; 875 dst.reg_offset += 2; 876 } 877 } 878 879 return inst; 880} 881 882/* gen5's sampler has slots for u, v, r, array index, then optional 883 * parameters like shadow comparitor or LOD bias. If optional 884 * parameters aren't present, those base slots are optional and don't 885 * need to be included in the message. 886 * 887 * We don't fill in the unnecessary slots regardless, which may look 888 * surprising in the disassembly. 889 */ 890fs_inst * 891fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 892 int sampler) 893{ 894 int mlen = 0; 895 int base_mrf = 2; 896 int reg_width = c->dispatch_width / 8; 897 bool header_present = false; 898 const int vector_elements = 899 ir->coordinate ? ir->coordinate->type->vector_elements : 0; 900 901 if (ir->offset != NULL && ir->op == ir_txf) { 902 /* It appears that the ld instruction used for txf does its 903 * address bounds check before adding in the offset. To work 904 * around this, just add the integer offset to the integer texel 905 * coordinate, and don't put the offset in the header. 906 */ 907 ir_constant *offset = ir->offset->as_constant(); 908 for (int i = 0; i < vector_elements; i++) { 909 emit(BRW_OPCODE_ADD, 910 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 911 coordinate, 912 offset->value.i[i]); 913 coordinate.reg_offset++; 914 } 915 } else { 916 if (ir->offset) { 917 /* The offsets set up by the ir_texture visitor are in the 918 * m1 header, so we can't go headerless. 919 */ 920 header_present = true; 921 mlen++; 922 base_mrf--; 923 } 924 925 for (int i = 0; i < vector_elements; i++) { 926 emit(BRW_OPCODE_MOV, 927 fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), 928 coordinate); 929 coordinate.reg_offset++; 930 } 931 } 932 mlen += vector_elements * reg_width; 933 934 if (ir->shadow_comparitor && ir->op != ir_txd) { 935 mlen = MAX2(mlen, header_present + 4 * reg_width); 936 937 ir->shadow_comparitor->accept(this); 938 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 939 mlen += reg_width; 940 } 941 942 fs_inst *inst = NULL; 943 switch (ir->op) { 944 case ir_tex: 945 inst = emit(SHADER_OPCODE_TEX, dst); 946 break; 947 case ir_txb: 948 ir->lod_info.bias->accept(this); 949 mlen = MAX2(mlen, header_present + 4 * reg_width); 950 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 951 mlen += reg_width; 952 953 inst = emit(FS_OPCODE_TXB, dst); 954 955 break; 956 case ir_txl: 957 ir->lod_info.lod->accept(this); 958 mlen = MAX2(mlen, header_present + 4 * reg_width); 959 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 960 mlen += reg_width; 961 962 inst = emit(SHADER_OPCODE_TXL, dst); 963 break; 964 case ir_txd: { 965 ir->lod_info.grad.dPdx->accept(this); 966 fs_reg dPdx = this->result; 967 968 ir->lod_info.grad.dPdy->accept(this); 969 fs_reg dPdy = this->result; 970 971 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 972 973 /** 974 * P = u, v, r 975 * dPdx = dudx, dvdx, drdx 976 * dPdy = dudy, dvdy, drdy 977 * 978 * Load up these values: 979 * - dudx dudy dvdx dvdy drdx drdy 980 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 981 */ 982 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 983 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 984 dPdx.reg_offset++; 985 mlen += reg_width; 986 987 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 988 dPdy.reg_offset++; 989 mlen += reg_width; 990 } 991 992 inst = emit(SHADER_OPCODE_TXD, dst); 993 break; 994 } 995 case ir_txs: 996 ir->lod_info.lod->accept(this); 997 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 998 mlen += reg_width; 999 inst = emit(SHADER_OPCODE_TXS, dst); 1000 break; 1001 case ir_txf: 1002 mlen = header_present + 4 * reg_width; 1003 1004 ir->lod_info.lod->accept(this); 1005 emit(BRW_OPCODE_MOV, 1006 fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), 1007 this->result); 1008 inst = emit(SHADER_OPCODE_TXF, dst); 1009 break; 1010 } 1011 inst->base_mrf = base_mrf; 1012 inst->mlen = mlen; 1013 inst->header_present = header_present; 1014 1015 if (mlen > 11) { 1016 fail("Message length >11 disallowed by hardware\n"); 1017 } 1018 1019 return inst; 1020} 1021 1022fs_inst * 1023fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 1024 int sampler) 1025{ 1026 int mlen = 0; 1027 int base_mrf = 2; 1028 int reg_width = c->dispatch_width / 8; 1029 bool header_present = false; 1030 int offsets[3]; 1031 1032 if (ir->offset && ir->op != ir_txf) { 1033 /* The offsets set up by the ir_texture visitor are in the 1034 * m1 header, so we can't go headerless. 1035 */ 1036 header_present = true; 1037 mlen++; 1038 base_mrf--; 1039 } 1040 1041 if (ir->shadow_comparitor && ir->op != ir_txd) { 1042 ir->shadow_comparitor->accept(this); 1043 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1044 mlen += reg_width; 1045 } 1046 1047 /* Set up the LOD info */ 1048 switch (ir->op) { 1049 case ir_tex: 1050 break; 1051 case ir_txb: 1052 ir->lod_info.bias->accept(this); 1053 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1054 mlen += reg_width; 1055 break; 1056 case ir_txl: 1057 ir->lod_info.lod->accept(this); 1058 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 1059 mlen += reg_width; 1060 break; 1061 case ir_txd: { 1062 if (c->dispatch_width == 16) 1063 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 1064 1065 ir->lod_info.grad.dPdx->accept(this); 1066 fs_reg dPdx = this->result; 1067 1068 ir->lod_info.grad.dPdy->accept(this); 1069 fs_reg dPdy = this->result; 1070 1071 /* Load dPdx and the coordinate together: 1072 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 1073 */ 1074 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1075 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1076 coordinate.reg_offset++; 1077 mlen += reg_width; 1078 1079 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 1080 dPdx.reg_offset++; 1081 mlen += reg_width; 1082 1083 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 1084 dPdy.reg_offset++; 1085 mlen += reg_width; 1086 } 1087 break; 1088 } 1089 case ir_txs: 1090 ir->lod_info.lod->accept(this); 1091 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); 1092 mlen += reg_width; 1093 break; 1094 case ir_txf: 1095 /* It appears that the ld instruction used for txf does its 1096 * address bounds check before adding in the offset. To work 1097 * around this, just add the integer offset to the integer texel 1098 * coordinate, and don't put the offset in the header. 1099 */ 1100 if (ir->offset) { 1101 ir_constant *offset = ir->offset->as_constant(); 1102 offsets[0] = offset->value.i[0]; 1103 offsets[1] = offset->value.i[1]; 1104 offsets[2] = offset->value.i[2]; 1105 } else { 1106 memset(offsets, 0, sizeof(offsets)); 1107 } 1108 1109 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ 1110 emit(BRW_OPCODE_ADD, 1111 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[0]); 1112 coordinate.reg_offset++; 1113 mlen += reg_width; 1114 1115 ir->lod_info.lod->accept(this); 1116 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), this->result); 1117 mlen += reg_width; 1118 1119 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { 1120 emit(BRW_OPCODE_ADD, 1121 fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate, offsets[i]); 1122 coordinate.reg_offset++; 1123 mlen += reg_width; 1124 } 1125 break; 1126 } 1127 1128 /* Set up the coordinate (except for cases where it was done above) */ 1129 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) { 1130 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1131 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); 1132 coordinate.reg_offset++; 1133 mlen += reg_width; 1134 } 1135 } 1136 1137 /* Generate the SEND */ 1138 fs_inst *inst = NULL; 1139 switch (ir->op) { 1140 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; 1141 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 1142 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; 1143 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; 1144 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; 1145 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; 1146 } 1147 inst->base_mrf = base_mrf; 1148 inst->mlen = mlen; 1149 inst->header_present = header_present; 1150 1151 if (mlen > 11) { 1152 fail("Message length >11 disallowed by hardware\n"); 1153 } 1154 1155 return inst; 1156} 1157 1158void 1159fs_visitor::visit(ir_texture *ir) 1160{ 1161 fs_inst *inst = NULL; 1162 1163 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 1164 sampler = fp->Base.SamplerUnits[sampler]; 1165 1166 /* Our hardware doesn't have a sample_d_c message, so shadow compares 1167 * for textureGrad/TXD need to be emulated with instructions. 1168 */ 1169 bool hw_compare_supported = ir->op != ir_txd; 1170 if (ir->shadow_comparitor && !hw_compare_supported) { 1171 assert(c->key.tex.compare_funcs[sampler] != GL_NONE); 1172 /* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */ 1173 if (c->key.tex.compare_funcs[sampler] == GL_ALWAYS) 1174 return swizzle_result(ir, fs_reg(1.0f), sampler); 1175 else if (c->key.tex.compare_funcs[sampler] == GL_NEVER) 1176 return swizzle_result(ir, fs_reg(0.0f), sampler); 1177 } 1178 1179 if (ir->coordinate) 1180 ir->coordinate->accept(this); 1181 fs_reg coordinate = this->result; 1182 1183 if (ir->offset != NULL && !(intel->gen == 7 && ir->op == ir_txf)) { 1184 uint32_t offset_bits = brw_texture_offset(ir->offset->as_constant()); 1185 1186 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 1187 emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 1188 fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD))); 1189 1190 /* Then set the offset bits in DWord 2 of the message header. */ 1191 emit(BRW_OPCODE_MOV, 1192 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1193 BRW_REGISTER_TYPE_UD)), 1194 fs_reg(brw_imm_uw(offset_bits))); 1195 } 1196 1197 /* Should be lowered by do_lower_texture_projection */ 1198 assert(!ir->projector); 1199 1200 bool needs_gl_clamp = true; 1201 1202 fs_reg scale_x, scale_y; 1203 1204 /* The 965 requires the EU to do the normalization of GL rectangle 1205 * texture coordinates. We use the program parameter state 1206 * tracking to get the scaling factor. 1207 */ 1208 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT && 1209 (intel->gen < 6 || 1210 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || 1211 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { 1212 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1213 int tokens[STATE_LENGTH] = { 1214 STATE_INTERNAL, 1215 STATE_TEXRECT_SCALE, 1216 sampler, 1217 0, 1218 0 1219 }; 1220 1221 if (c->dispatch_width == 16) { 1222 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1223 this->result = fs_reg(this, ir->type); 1224 return; 1225 } 1226 1227 c->prog_data.param_convert[c->prog_data.nr_params] = 1228 PARAM_NO_CONVERT; 1229 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1230 PARAM_NO_CONVERT; 1231 1232 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1233 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1234 1235 GLuint index = _mesa_add_state_reference(params, 1236 (gl_state_index *)tokens); 1237 1238 this->param_index[c->prog_data.nr_params] = index; 1239 this->param_offset[c->prog_data.nr_params] = 0; 1240 c->prog_data.nr_params++; 1241 this->param_index[c->prog_data.nr_params] = index; 1242 this->param_offset[c->prog_data.nr_params] = 1; 1243 c->prog_data.nr_params++; 1244 } 1245 1246 /* The 965 requires the EU to do the normalization of GL rectangle 1247 * texture coordinates. We use the program parameter state 1248 * tracking to get the scaling factor. 1249 */ 1250 if (intel->gen < 6 && 1251 ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1252 fs_reg dst = fs_reg(this, ir->coordinate->type); 1253 fs_reg src = coordinate; 1254 coordinate = dst; 1255 1256 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1257 dst.reg_offset++; 1258 src.reg_offset++; 1259 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1260 } else if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1261 /* On gen6+, the sampler handles the rectangle coordinates 1262 * natively, without needing rescaling. But that means we have 1263 * to do GL_CLAMP clamping at the [0, width], [0, height] scale, 1264 * not [0, 1] like the default case below. 1265 */ 1266 needs_gl_clamp = false; 1267 1268 for (int i = 0; i < 2; i++) { 1269 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1270 fs_reg chan = coordinate; 1271 chan.reg_offset += i; 1272 1273 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); 1274 inst->conditional_mod = BRW_CONDITIONAL_G; 1275 1276 /* Our parameter comes in as 1.0/width or 1.0/height, 1277 * because that's what people normally want for doing 1278 * texture rectangle handling. We need width or height 1279 * for clamping, but we don't care enough to make a new 1280 * parameter type, so just invert back. 1281 */ 1282 fs_reg limit = fs_reg(this, glsl_type::float_type); 1283 emit(BRW_OPCODE_MOV, limit, i == 0 ? scale_x : scale_y); 1284 emit(SHADER_OPCODE_RCP, limit, limit); 1285 1286 inst = emit(BRW_OPCODE_SEL, chan, chan, limit); 1287 inst->conditional_mod = BRW_CONDITIONAL_L; 1288 } 1289 } 1290 } 1291 1292 if (ir->coordinate && needs_gl_clamp) { 1293 for (unsigned int i = 0; 1294 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { 1295 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { 1296 fs_reg chan = coordinate; 1297 chan.reg_offset += i; 1298 1299 fs_inst *inst = emit(BRW_OPCODE_MOV, chan, chan); 1300 inst->saturate = true; 1301 } 1302 } 1303 } 1304 1305 /* Writemasking doesn't eliminate channels on SIMD8 texture 1306 * samples, so don't worry about them. 1307 */ 1308 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); 1309 1310 if (intel->gen >= 7) { 1311 inst = emit_texture_gen7(ir, dst, coordinate, sampler); 1312 } else if (intel->gen >= 5) { 1313 inst = emit_texture_gen5(ir, dst, coordinate, sampler); 1314 } else { 1315 inst = emit_texture_gen4(ir, dst, coordinate, sampler); 1316 } 1317 1318 /* If there's an offset, we already set up m1. To avoid the implied move, 1319 * use the null register. Otherwise, we want an implied move from g0. 1320 */ 1321 if (ir->offset != NULL || !inst->header_present) 1322 inst->src[0] = reg_undef; 1323 else 1324 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1325 1326 inst->sampler = sampler; 1327 1328 if (ir->shadow_comparitor) { 1329 if (hw_compare_supported) { 1330 inst->shadow_compare = true; 1331 } else { 1332 ir->shadow_comparitor->accept(this); 1333 fs_reg ref = this->result; 1334 1335 fs_reg value = dst; 1336 dst = fs_reg(this, glsl_type::vec4_type); 1337 1338 /* FINISHME: This needs to be done pre-filtering. */ 1339 1340 uint32_t conditional = 0; 1341 switch (c->key.tex.compare_funcs[sampler]) { 1342 /* GL_ALWAYS and GL_NEVER were handled at the top of the function */ 1343 case GL_LESS: conditional = BRW_CONDITIONAL_L; break; 1344 case GL_GREATER: conditional = BRW_CONDITIONAL_G; break; 1345 case GL_LEQUAL: conditional = BRW_CONDITIONAL_LE; break; 1346 case GL_GEQUAL: conditional = BRW_CONDITIONAL_GE; break; 1347 case GL_EQUAL: conditional = BRW_CONDITIONAL_EQ; break; 1348 case GL_NOTEQUAL: conditional = BRW_CONDITIONAL_NEQ; break; 1349 default: assert(!"Should not get here: bad shadow compare function"); 1350 } 1351 1352 /* Use conditional moves to load 0 or 1 as the result */ 1353 this->current_annotation = "manual shadow comparison"; 1354 for (int i = 0; i < 4; i++) { 1355 inst = emit(BRW_OPCODE_MOV, dst, fs_reg(0.0f)); 1356 1357 inst = emit(BRW_OPCODE_CMP, reg_null_f, ref, value); 1358 inst->conditional_mod = conditional; 1359 1360 inst = emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); 1361 inst->predicated = true; 1362 1363 dst.reg_offset++; 1364 value.reg_offset++; 1365 } 1366 dst.reg_offset = 0; 1367 } 1368 } 1369 1370 swizzle_result(ir, dst, sampler); 1371} 1372 1373/** 1374 * Swizzle the result of a texture result. This is necessary for 1375 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1376 */ 1377void 1378fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1379{ 1380 this->result = orig_val; 1381 1382 if (ir->op == ir_txs) 1383 return; 1384 1385 if (ir->type == glsl_type::float_type) { 1386 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1387 assert(ir->sampler->type->sampler_shadow); 1388 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { 1389 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1390 1391 for (int i = 0; i < 4; i++) { 1392 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); 1393 fs_reg l = swizzled_result; 1394 l.reg_offset += i; 1395 1396 if (swiz == SWIZZLE_ZERO) { 1397 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1398 } else if (swiz == SWIZZLE_ONE) { 1399 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1400 } else { 1401 fs_reg r = orig_val; 1402 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); 1403 emit(BRW_OPCODE_MOV, l, r); 1404 } 1405 } 1406 this->result = swizzled_result; 1407 } 1408} 1409 1410void 1411fs_visitor::visit(ir_swizzle *ir) 1412{ 1413 ir->val->accept(this); 1414 fs_reg val = this->result; 1415 1416 if (ir->type->vector_elements == 1) { 1417 this->result.reg_offset += ir->mask.x; 1418 return; 1419 } 1420 1421 fs_reg result = fs_reg(this, ir->type); 1422 this->result = result; 1423 1424 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1425 fs_reg channel = val; 1426 int swiz = 0; 1427 1428 switch (i) { 1429 case 0: 1430 swiz = ir->mask.x; 1431 break; 1432 case 1: 1433 swiz = ir->mask.y; 1434 break; 1435 case 2: 1436 swiz = ir->mask.z; 1437 break; 1438 case 3: 1439 swiz = ir->mask.w; 1440 break; 1441 } 1442 1443 channel.reg_offset += swiz; 1444 emit(BRW_OPCODE_MOV, result, channel); 1445 result.reg_offset++; 1446 } 1447} 1448 1449void 1450fs_visitor::visit(ir_discard *ir) 1451{ 1452 assert(ir->condition == NULL); /* FINISHME */ 1453 1454 emit(FS_OPCODE_DISCARD); 1455 kill_emitted = true; 1456} 1457 1458void 1459fs_visitor::visit(ir_constant *ir) 1460{ 1461 /* Set this->result to reg at the bottom of the function because some code 1462 * paths will cause this visitor to be applied to other fields. This will 1463 * cause the value stored in this->result to be modified. 1464 * 1465 * Make reg constant so that it doesn't get accidentally modified along the 1466 * way. Yes, I actually had this problem. :( 1467 */ 1468 const fs_reg reg(this, ir->type); 1469 fs_reg dst_reg = reg; 1470 1471 if (ir->type->is_array()) { 1472 const unsigned size = type_size(ir->type->fields.array); 1473 1474 for (unsigned i = 0; i < ir->type->length; i++) { 1475 ir->array_elements[i]->accept(this); 1476 fs_reg src_reg = this->result; 1477 1478 dst_reg.type = src_reg.type; 1479 for (unsigned j = 0; j < size; j++) { 1480 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1481 src_reg.reg_offset++; 1482 dst_reg.reg_offset++; 1483 } 1484 } 1485 } else if (ir->type->is_record()) { 1486 foreach_list(node, &ir->components) { 1487 ir_constant *const field = (ir_constant *) node; 1488 const unsigned size = type_size(field->type); 1489 1490 field->accept(this); 1491 fs_reg src_reg = this->result; 1492 1493 dst_reg.type = src_reg.type; 1494 for (unsigned j = 0; j < size; j++) { 1495 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1496 src_reg.reg_offset++; 1497 dst_reg.reg_offset++; 1498 } 1499 } 1500 } else { 1501 const unsigned size = type_size(ir->type); 1502 1503 for (unsigned i = 0; i < size; i++) { 1504 switch (ir->type->base_type) { 1505 case GLSL_TYPE_FLOAT: 1506 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1507 break; 1508 case GLSL_TYPE_UINT: 1509 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1510 break; 1511 case GLSL_TYPE_INT: 1512 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1513 break; 1514 case GLSL_TYPE_BOOL: 1515 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1516 break; 1517 default: 1518 assert(!"Non-float/uint/int/bool constant"); 1519 } 1520 dst_reg.reg_offset++; 1521 } 1522 } 1523 1524 this->result = reg; 1525} 1526 1527void 1528fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1529{ 1530 ir_expression *expr = ir->as_expression(); 1531 1532 if (expr) { 1533 fs_reg op[2]; 1534 fs_inst *inst; 1535 1536 assert(expr->get_num_operands() <= 2); 1537 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1538 assert(expr->operands[i]->type->is_scalar()); 1539 1540 expr->operands[i]->accept(this); 1541 op[i] = this->result; 1542 1543 resolve_ud_negate(&op[i]); 1544 } 1545 1546 switch (expr->operation) { 1547 case ir_unop_logic_not: 1548 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1549 inst->conditional_mod = BRW_CONDITIONAL_Z; 1550 break; 1551 1552 case ir_binop_logic_xor: 1553 case ir_binop_logic_or: 1554 case ir_binop_logic_and: 1555 goto out; 1556 1557 case ir_unop_f2b: 1558 if (intel->gen >= 6) { 1559 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1560 } else { 1561 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1562 } 1563 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1564 break; 1565 1566 case ir_unop_i2b: 1567 if (intel->gen >= 6) { 1568 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1569 } else { 1570 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1571 } 1572 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1573 break; 1574 1575 case ir_binop_greater: 1576 case ir_binop_gequal: 1577 case ir_binop_less: 1578 case ir_binop_lequal: 1579 case ir_binop_equal: 1580 case ir_binop_all_equal: 1581 case ir_binop_nequal: 1582 case ir_binop_any_nequal: 1583 resolve_bool_comparison(expr->operands[0], &op[0]); 1584 resolve_bool_comparison(expr->operands[1], &op[1]); 1585 1586 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1587 inst->conditional_mod = 1588 brw_conditional_for_comparison(expr->operation); 1589 break; 1590 1591 default: 1592 assert(!"not reached"); 1593 fail("bad cond code\n"); 1594 break; 1595 } 1596 return; 1597 } 1598 1599out: 1600 ir->accept(this); 1601 1602 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1603 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1604} 1605 1606/** 1607 * Emit a gen6 IF statement with the comparison folded into the IF 1608 * instruction. 1609 */ 1610void 1611fs_visitor::emit_if_gen6(ir_if *ir) 1612{ 1613 ir_expression *expr = ir->condition->as_expression(); 1614 1615 if (expr) { 1616 fs_reg op[2]; 1617 fs_inst *inst; 1618 fs_reg temp; 1619 1620 assert(expr->get_num_operands() <= 2); 1621 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1622 assert(expr->operands[i]->type->is_scalar()); 1623 1624 expr->operands[i]->accept(this); 1625 op[i] = this->result; 1626 } 1627 1628 switch (expr->operation) { 1629 case ir_unop_logic_not: 1630 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1631 inst->conditional_mod = BRW_CONDITIONAL_Z; 1632 return; 1633 1634 case ir_binop_logic_xor: 1635 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1636 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1637 return; 1638 1639 case ir_binop_logic_or: 1640 temp = fs_reg(this, glsl_type::bool_type); 1641 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1642 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1643 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1644 return; 1645 1646 case ir_binop_logic_and: 1647 temp = fs_reg(this, glsl_type::bool_type); 1648 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1649 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1650 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1651 return; 1652 1653 case ir_unop_f2b: 1654 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1655 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1656 return; 1657 1658 case ir_unop_i2b: 1659 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1660 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1661 return; 1662 1663 case ir_binop_greater: 1664 case ir_binop_gequal: 1665 case ir_binop_less: 1666 case ir_binop_lequal: 1667 case ir_binop_equal: 1668 case ir_binop_all_equal: 1669 case ir_binop_nequal: 1670 case ir_binop_any_nequal: 1671 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1672 inst->conditional_mod = 1673 brw_conditional_for_comparison(expr->operation); 1674 return; 1675 default: 1676 assert(!"not reached"); 1677 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1678 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1679 fail("bad condition\n"); 1680 return; 1681 } 1682 return; 1683 } 1684 1685 ir->condition->accept(this); 1686 1687 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1688 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1689} 1690 1691void 1692fs_visitor::visit(ir_if *ir) 1693{ 1694 fs_inst *inst; 1695 1696 if (intel->gen < 6 && c->dispatch_width == 16) { 1697 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1698 } 1699 1700 /* Don't point the annotation at the if statement, because then it plus 1701 * the then and else blocks get printed. 1702 */ 1703 this->base_ir = ir->condition; 1704 1705 if (intel->gen == 6) { 1706 emit_if_gen6(ir); 1707 } else { 1708 emit_bool_to_cond_code(ir->condition); 1709 1710 inst = emit(BRW_OPCODE_IF); 1711 inst->predicated = true; 1712 } 1713 1714 foreach_list(node, &ir->then_instructions) { 1715 ir_instruction *ir = (ir_instruction *)node; 1716 this->base_ir = ir; 1717 1718 ir->accept(this); 1719 } 1720 1721 if (!ir->else_instructions.is_empty()) { 1722 emit(BRW_OPCODE_ELSE); 1723 1724 foreach_list(node, &ir->else_instructions) { 1725 ir_instruction *ir = (ir_instruction *)node; 1726 this->base_ir = ir; 1727 1728 ir->accept(this); 1729 } 1730 } 1731 1732 emit(BRW_OPCODE_ENDIF); 1733} 1734 1735void 1736fs_visitor::visit(ir_loop *ir) 1737{ 1738 fs_reg counter = reg_undef; 1739 1740 if (intel->gen < 6 && c->dispatch_width == 16) { 1741 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1742 } 1743 1744 if (ir->counter) { 1745 this->base_ir = ir->counter; 1746 ir->counter->accept(this); 1747 counter = *(variable_storage(ir->counter)); 1748 1749 if (ir->from) { 1750 this->base_ir = ir->from; 1751 ir->from->accept(this); 1752 1753 emit(BRW_OPCODE_MOV, counter, this->result); 1754 } 1755 } 1756 1757 this->base_ir = NULL; 1758 emit(BRW_OPCODE_DO); 1759 1760 if (ir->to) { 1761 this->base_ir = ir->to; 1762 ir->to->accept(this); 1763 1764 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1765 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1766 1767 inst = emit(BRW_OPCODE_BREAK); 1768 inst->predicated = true; 1769 } 1770 1771 foreach_list(node, &ir->body_instructions) { 1772 ir_instruction *ir = (ir_instruction *)node; 1773 1774 this->base_ir = ir; 1775 ir->accept(this); 1776 } 1777 1778 if (ir->increment) { 1779 this->base_ir = ir->increment; 1780 ir->increment->accept(this); 1781 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1782 } 1783 1784 this->base_ir = NULL; 1785 emit(BRW_OPCODE_WHILE); 1786} 1787 1788void 1789fs_visitor::visit(ir_loop_jump *ir) 1790{ 1791 switch (ir->mode) { 1792 case ir_loop_jump::jump_break: 1793 emit(BRW_OPCODE_BREAK); 1794 break; 1795 case ir_loop_jump::jump_continue: 1796 emit(BRW_OPCODE_CONTINUE); 1797 break; 1798 } 1799} 1800 1801void 1802fs_visitor::visit(ir_call *ir) 1803{ 1804 assert(!"FINISHME"); 1805} 1806 1807void 1808fs_visitor::visit(ir_return *ir) 1809{ 1810 assert(!"FINISHME"); 1811} 1812 1813void 1814fs_visitor::visit(ir_function *ir) 1815{ 1816 /* Ignore function bodies other than main() -- we shouldn't see calls to 1817 * them since they should all be inlined before we get to ir_to_mesa. 1818 */ 1819 if (strcmp(ir->name, "main") == 0) { 1820 const ir_function_signature *sig; 1821 exec_list empty; 1822 1823 sig = ir->matching_signature(&empty); 1824 1825 assert(sig); 1826 1827 foreach_list(node, &sig->body) { 1828 ir_instruction *ir = (ir_instruction *)node; 1829 this->base_ir = ir; 1830 1831 ir->accept(this); 1832 } 1833 } 1834} 1835 1836void 1837fs_visitor::visit(ir_function_signature *ir) 1838{ 1839 assert(!"not reached"); 1840 (void)ir; 1841} 1842 1843fs_inst * 1844fs_visitor::emit(fs_inst inst) 1845{ 1846 fs_inst *list_inst = new(mem_ctx) fs_inst; 1847 *list_inst = inst; 1848 1849 if (force_uncompressed_stack > 0) 1850 list_inst->force_uncompressed = true; 1851 else if (force_sechalf_stack > 0) 1852 list_inst->force_sechalf = true; 1853 1854 list_inst->annotation = this->current_annotation; 1855 list_inst->ir = this->base_ir; 1856 1857 this->instructions.push_tail(list_inst); 1858 1859 return list_inst; 1860} 1861 1862/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1863void 1864fs_visitor::emit_dummy_fs() 1865{ 1866 int reg_width = c->dispatch_width / 8; 1867 1868 /* Everyone's favorite color. */ 1869 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)); 1870 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)); 1871 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)); 1872 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)); 1873 1874 fs_inst *write; 1875 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1876 write->base_mrf = 2; 1877 write->mlen = 4 * reg_width; 1878 write->eot = true; 1879} 1880 1881/* The register location here is relative to the start of the URB 1882 * data. It will get adjusted to be a real location before 1883 * generate_code() time. 1884 */ 1885struct brw_reg 1886fs_visitor::interp_reg(int location, int channel) 1887{ 1888 int regnr = urb_setup[location] * 2 + channel / 2; 1889 int stride = (channel & 1) * 4; 1890 1891 assert(urb_setup[location] != -1); 1892 1893 return brw_vec1_grf(regnr, stride); 1894} 1895 1896/** Emits the interpolation for the varying inputs. */ 1897void 1898fs_visitor::emit_interpolation_setup_gen4() 1899{ 1900 this->current_annotation = "compute pixel centers"; 1901 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1902 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1903 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1904 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1905 1906 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1907 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1908 1909 this->current_annotation = "compute pixel deltas from v0"; 1910 if (brw->has_pln) { 1911 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1912 fs_reg(this, glsl_type::vec2_type); 1913 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1914 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; 1915 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; 1916 } else { 1917 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1918 fs_reg(this, glsl_type::float_type); 1919 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = 1920 fs_reg(this, glsl_type::float_type); 1921 } 1922 emit(BRW_OPCODE_ADD, this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1923 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1924 emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1925 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1926 1927 this->current_annotation = "compute pos.w and 1/pos.w"; 1928 /* Compute wpos.w. It's always in our setup, since it's needed to 1929 * interpolate the other attributes. 1930 */ 1931 this->wpos_w = fs_reg(this, glsl_type::float_type); 1932 emit(FS_OPCODE_LINTERP, wpos_w, 1933 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1934 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1935 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1936 /* Compute the pixel 1/W value from wpos.w. */ 1937 this->pixel_w = fs_reg(this, glsl_type::float_type); 1938 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1939 this->current_annotation = NULL; 1940} 1941 1942/** Emits the interpolation for the varying inputs. */ 1943void 1944fs_visitor::emit_interpolation_setup_gen6() 1945{ 1946 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1947 1948 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1949 this->current_annotation = "compute pixel centers"; 1950 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1951 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1952 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1953 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1954 emit(BRW_OPCODE_ADD, 1955 int_pixel_x, 1956 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1957 fs_reg(brw_imm_v(0x10101010))); 1958 emit(BRW_OPCODE_ADD, 1959 int_pixel_y, 1960 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1961 fs_reg(brw_imm_v(0x11001100))); 1962 1963 /* As of gen6, we can no longer mix float and int sources. We have 1964 * to turn the integer pixel centers into floats for their actual 1965 * use. 1966 */ 1967 this->pixel_x = fs_reg(this, glsl_type::float_type); 1968 this->pixel_y = fs_reg(this, glsl_type::float_type); 1969 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1970 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1971 1972 this->current_annotation = "compute pos.w"; 1973 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1974 this->wpos_w = fs_reg(this, glsl_type::float_type); 1975 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1976 1977 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { 1978 uint8_t reg = c->barycentric_coord_reg[i]; 1979 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); 1980 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); 1981 } 1982 1983 this->current_annotation = NULL; 1984} 1985 1986void 1987fs_visitor::emit_color_write(int target, int index, int first_color_mrf) 1988{ 1989 int reg_width = c->dispatch_width / 8; 1990 fs_inst *inst; 1991 fs_reg color = outputs[target]; 1992 fs_reg mrf; 1993 1994 /* If there's no color data to be written, skip it. */ 1995 if (color.file == BAD_FILE) 1996 return; 1997 1998 color.reg_offset += index; 1999 2000 if (c->dispatch_width == 8 || intel->gen >= 6) { 2001 /* SIMD8 write looks like: 2002 * m + 0: r0 2003 * m + 1: r1 2004 * m + 2: g0 2005 * m + 3: g1 2006 * 2007 * gen6 SIMD16 DP write looks like: 2008 * m + 0: r0 2009 * m + 1: r1 2010 * m + 2: g0 2011 * m + 3: g1 2012 * m + 4: b0 2013 * m + 5: b1 2014 * m + 6: a0 2015 * m + 7: a1 2016 */ 2017 inst = emit(BRW_OPCODE_MOV, 2018 fs_reg(MRF, first_color_mrf + index * reg_width, color.type), 2019 color); 2020 inst->saturate = c->key.clamp_fragment_color; 2021 } else { 2022 /* pre-gen6 SIMD16 single source DP write looks like: 2023 * m + 0: r0 2024 * m + 1: g0 2025 * m + 2: b0 2026 * m + 3: a0 2027 * m + 4: r1 2028 * m + 5: g1 2029 * m + 6: b1 2030 * m + 7: a1 2031 */ 2032 if (brw->has_compr4) { 2033 /* By setting the high bit of the MRF register number, we 2034 * indicate that we want COMPR4 mode - instead of doing the 2035 * usual destination + 1 for the second half we get 2036 * destination + 4. 2037 */ 2038 inst = emit(BRW_OPCODE_MOV, 2039 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, 2040 color.type), 2041 color); 2042 inst->saturate = c->key.clamp_fragment_color; 2043 } else { 2044 push_force_uncompressed(); 2045 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index, 2046 color.type), 2047 color); 2048 inst->saturate = c->key.clamp_fragment_color; 2049 pop_force_uncompressed(); 2050 2051 push_force_sechalf(); 2052 color.sechalf = true; 2053 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4, 2054 color.type), 2055 color); 2056 inst->saturate = c->key.clamp_fragment_color; 2057 pop_force_sechalf(); 2058 color.sechalf = false; 2059 } 2060 } 2061} 2062 2063void 2064fs_visitor::emit_fb_writes() 2065{ 2066 this->current_annotation = "FB write header"; 2067 bool header_present = true; 2068 /* We can potentially have a message length of up to 15, so we have to set 2069 * base_mrf to either 0 or 1 in order to fit in m0..m15. 2070 */ 2071 int base_mrf = 1; 2072 int nr = base_mrf; 2073 int reg_width = c->dispatch_width / 8; 2074 bool do_dual_src = this->dual_src_output.file != BAD_FILE; 2075 2076 if (c->dispatch_width == 16 && do_dual_src) { 2077 fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); 2078 do_dual_src = false; 2079 } 2080 2081 /* From the Sandy Bridge PRM, volume 4, page 198: 2082 * 2083 * "Dispatched Pixel Enables. One bit per pixel indicating 2084 * which pixels were originally enabled when the thread was 2085 * dispatched. This field is only required for the end-of- 2086 * thread message and on all dual-source messages." 2087 */ 2088 if (intel->gen >= 6 && 2089 !this->kill_emitted && 2090 !do_dual_src && 2091 c->key.nr_color_regions == 1) { 2092 header_present = false; 2093 } 2094 2095 if (header_present) { 2096 /* m2, m3 header */ 2097 nr += 2; 2098 } 2099 2100 if (c->aa_dest_stencil_reg) { 2101 push_force_uncompressed(); 2102 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 2103 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 2104 pop_force_uncompressed(); 2105 } 2106 2107 /* Reserve space for color. It'll be filled in per MRT below. */ 2108 int color_mrf = nr; 2109 nr += 4 * reg_width; 2110 if (do_dual_src) 2111 nr += 4; 2112 2113 if (c->source_depth_to_render_target) { 2114 if (intel->gen == 6 && c->dispatch_width == 16) { 2115 /* For outputting oDepth on gen6, SIMD8 writes have to be 2116 * used. This would require 8-wide moves of each half to 2117 * message regs, kind of like pre-gen5 SIMD16 FB writes. 2118 * Just bail on doing so for now. 2119 */ 2120 fail("Missing support for simd16 depth writes on gen6\n"); 2121 } 2122 2123 if (c->computes_depth) { 2124 /* Hand over gl_FragDepth. */ 2125 assert(this->frag_depth); 2126 fs_reg depth = *(variable_storage(this->frag_depth)); 2127 2128 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 2129 } else { 2130 /* Pass through the payload depth. */ 2131 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2132 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 2133 } 2134 nr += reg_width; 2135 } 2136 2137 if (c->dest_depth_reg) { 2138 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 2139 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 2140 nr += reg_width; 2141 } 2142 2143 if (do_dual_src) { 2144 fs_reg src0 = this->outputs[0]; 2145 fs_reg src1 = this->dual_src_output; 2146 2147 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2148 "FB write src0"); 2149 for (int i = 0; i < 4; i++) { 2150 fs_inst *inst = emit(BRW_OPCODE_MOV, 2151 fs_reg(MRF, color_mrf + i, src0.type), 2152 src0); 2153 src0.reg_offset++; 2154 inst->saturate = c->key.clamp_fragment_color; 2155 } 2156 2157 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2158 "FB write src1"); 2159 for (int i = 0; i < 4; i++) { 2160 fs_inst *inst = emit(BRW_OPCODE_MOV, 2161 fs_reg(MRF, color_mrf + 4 + i, src1.type), 2162 src1); 2163 src1.reg_offset++; 2164 inst->saturate = c->key.clamp_fragment_color; 2165 } 2166 2167 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2168 inst->target = 0; 2169 inst->base_mrf = base_mrf; 2170 inst->mlen = nr - base_mrf; 2171 inst->eot = true; 2172 inst->header_present = header_present; 2173 2174 c->prog_data.dual_src_blend = true; 2175 this->current_annotation = NULL; 2176 return; 2177 } 2178 2179 for (int target = 0; target < c->key.nr_color_regions; target++) { 2180 this->current_annotation = ralloc_asprintf(this->mem_ctx, 2181 "FB write target %d", 2182 target); 2183 for (unsigned i = 0; i < this->output_components[target]; i++) 2184 emit_color_write(target, i, color_mrf); 2185 2186 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2187 inst->target = target; 2188 inst->base_mrf = base_mrf; 2189 inst->mlen = nr - base_mrf; 2190 if (target == c->key.nr_color_regions - 1) 2191 inst->eot = true; 2192 inst->header_present = header_present; 2193 } 2194 2195 if (c->key.nr_color_regions == 0) { 2196 if (c->key.alpha_test) { 2197 /* If the alpha test is enabled but there's no color buffer, 2198 * we still need to send alpha out the pipeline to our null 2199 * renderbuffer. 2200 */ 2201 emit_color_write(0, 3, color_mrf); 2202 } 2203 2204 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 2205 inst->base_mrf = base_mrf; 2206 inst->mlen = nr - base_mrf; 2207 inst->eot = true; 2208 inst->header_present = header_present; 2209 } 2210 2211 this->current_annotation = NULL; 2212} 2213 2214void 2215fs_visitor::resolve_ud_negate(fs_reg *reg) 2216{ 2217 if (reg->type != BRW_REGISTER_TYPE_UD || 2218 !reg->negate) 2219 return; 2220 2221 fs_reg temp = fs_reg(this, glsl_type::uint_type); 2222 emit(BRW_OPCODE_MOV, temp, *reg); 2223 *reg = temp; 2224} 2225 2226void 2227fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) 2228{ 2229 if (rvalue->type != glsl_type::bool_type) 2230 return; 2231 2232 fs_reg temp = fs_reg(this, glsl_type::bool_type); 2233 emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1)); 2234 *reg = temp; 2235} 2236