brw_fs_visitor.cpp revision 65b5cbbcf783f6c668ab5b31a0734680dd396794
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_fs_visitor.cpp 25 * 26 * This file supports generating the FS LIR from the GLSL IR. The LIR 27 * makes it easier to do backend-specific optimizations than doing so 28 * in the GLSL IR or in the native code. 29 */ 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "brw_shader.h" 48#include "brw_fs.h" 49#include "../glsl/glsl_types.h" 50#include "../glsl/ir_optimization.h" 51#include "../glsl/ir_print_visitor.h" 52 53void 54fs_visitor::visit(ir_variable *ir) 55{ 56 fs_reg *reg = NULL; 57 58 if (variable_storage(ir)) 59 return; 60 61 if (strcmp(ir->name, "gl_FragColor") == 0) { 62 this->frag_color = ir; 63 } else if (strcmp(ir->name, "gl_FragData") == 0) { 64 this->frag_data = ir; 65 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 66 this->frag_depth = ir; 67 } 68 69 if (ir->mode == ir_var_in) { 70 if (!strcmp(ir->name, "gl_FragCoord")) { 71 reg = emit_fragcoord_interpolation(ir); 72 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 73 reg = emit_frontfacing_interpolation(ir); 74 } else { 75 reg = emit_general_interpolation(ir); 76 } 77 assert(reg); 78 hash_table_insert(this->variable_ht, reg, ir); 79 return; 80 } 81 82 if (ir->mode == ir_var_uniform) { 83 int param_index = c->prog_data.nr_params; 84 85 if (c->dispatch_width == 16) { 86 if (!variable_storage(ir)) { 87 fail("Failed to find uniform '%s' in 16-wide\n", ir->name); 88 } 89 return; 90 } 91 92 if (!strncmp(ir->name, "gl_", 3)) { 93 setup_builtin_uniform_values(ir); 94 } else { 95 setup_uniform_values(ir->location, ir->type); 96 } 97 98 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 99 reg->type = brw_type_for_base_type(ir->type); 100 } 101 102 if (!reg) 103 reg = new(this->mem_ctx) fs_reg(this, ir->type); 104 105 hash_table_insert(this->variable_ht, reg, ir); 106} 107 108void 109fs_visitor::visit(ir_dereference_variable *ir) 110{ 111 fs_reg *reg = variable_storage(ir->var); 112 this->result = *reg; 113} 114 115void 116fs_visitor::visit(ir_dereference_record *ir) 117{ 118 const glsl_type *struct_type = ir->record->type; 119 120 ir->record->accept(this); 121 122 unsigned int offset = 0; 123 for (unsigned int i = 0; i < struct_type->length; i++) { 124 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 125 break; 126 offset += type_size(struct_type->fields.structure[i].type); 127 } 128 this->result.reg_offset += offset; 129 this->result.type = brw_type_for_base_type(ir->type); 130} 131 132void 133fs_visitor::visit(ir_dereference_array *ir) 134{ 135 ir_constant *index; 136 int element_size; 137 138 ir->array->accept(this); 139 index = ir->array_index->as_constant(); 140 141 element_size = type_size(ir->type); 142 this->result.type = brw_type_for_base_type(ir->type); 143 144 if (index) { 145 assert(this->result.file == UNIFORM || this->result.file == GRF); 146 this->result.reg_offset += index->value.i[0] * element_size; 147 } else { 148 assert(!"FINISHME: non-constant array element"); 149 } 150} 151 152/* Instruction selection: Produce a MOV.sat instead of 153 * MIN(MAX(val, 0), 1) when possible. 154 */ 155bool 156fs_visitor::try_emit_saturate(ir_expression *ir) 157{ 158 ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); 159 160 if (!sat_val) 161 return false; 162 163 this->result = reg_undef; 164 sat_val->accept(this); 165 fs_reg src = this->result; 166 167 this->result = fs_reg(this, ir->type); 168 fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); 169 inst->saturate = true; 170 171 return true; 172} 173 174void 175fs_visitor::visit(ir_expression *ir) 176{ 177 unsigned int operand; 178 fs_reg op[2], temp; 179 fs_inst *inst; 180 181 assert(ir->get_num_operands() <= 2); 182 183 if (try_emit_saturate(ir)) 184 return; 185 186 /* This is where our caller would like us to put the result, if possible. */ 187 fs_reg saved_result_storage = this->result; 188 189 for (operand = 0; operand < ir->get_num_operands(); operand++) { 190 this->result = reg_undef; 191 ir->operands[operand]->accept(this); 192 if (this->result.file == BAD_FILE) { 193 ir_print_visitor v; 194 fail("Failed to get tree for expression operand:\n"); 195 ir->operands[operand]->accept(&v); 196 } 197 op[operand] = this->result; 198 199 /* Matrix expression operands should have been broken down to vector 200 * operations already. 201 */ 202 assert(!ir->operands[operand]->type->is_matrix()); 203 /* And then those vector operands should have been broken down to scalar. 204 */ 205 assert(!ir->operands[operand]->type->is_vector()); 206 } 207 208 /* Inherit storage from our parent if possible, and otherwise we 209 * alloc a temporary. 210 */ 211 if (saved_result_storage.file == BAD_FILE) { 212 this->result = fs_reg(this, ir->type); 213 } else { 214 this->result = saved_result_storage; 215 } 216 217 switch (ir->operation) { 218 case ir_unop_logic_not: 219 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 220 * ones complement of the whole register, not just bit 0. 221 */ 222 emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); 223 break; 224 case ir_unop_neg: 225 op[0].negate = !op[0].negate; 226 this->result = op[0]; 227 break; 228 case ir_unop_abs: 229 op[0].abs = true; 230 op[0].negate = false; 231 this->result = op[0]; 232 break; 233 case ir_unop_sign: 234 temp = fs_reg(this, ir->type); 235 236 /* Unalias the destination. (imagine a = sign(a)) */ 237 this->result = fs_reg(this, ir->type); 238 239 emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); 240 241 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 242 inst->conditional_mod = BRW_CONDITIONAL_G; 243 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); 244 inst->predicated = true; 245 246 inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); 247 inst->conditional_mod = BRW_CONDITIONAL_L; 248 inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); 249 inst->predicated = true; 250 251 break; 252 case ir_unop_rcp: 253 emit_math(SHADER_OPCODE_RCP, this->result, op[0]); 254 break; 255 256 case ir_unop_exp2: 257 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); 258 break; 259 case ir_unop_log2: 260 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); 261 break; 262 case ir_unop_exp: 263 case ir_unop_log: 264 assert(!"not reached: should be handled by ir_explog_to_explog2"); 265 break; 266 case ir_unop_sin: 267 case ir_unop_sin_reduced: 268 emit_math(SHADER_OPCODE_SIN, this->result, op[0]); 269 break; 270 case ir_unop_cos: 271 case ir_unop_cos_reduced: 272 emit_math(SHADER_OPCODE_COS, this->result, op[0]); 273 break; 274 275 case ir_unop_dFdx: 276 emit(FS_OPCODE_DDX, this->result, op[0]); 277 break; 278 case ir_unop_dFdy: 279 emit(FS_OPCODE_DDY, this->result, op[0]); 280 break; 281 282 case ir_binop_add: 283 emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); 284 break; 285 case ir_binop_sub: 286 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 287 break; 288 289 case ir_binop_mul: 290 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); 291 break; 292 case ir_binop_div: 293 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 294 break; 295 case ir_binop_mod: 296 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 297 break; 298 299 case ir_binop_less: 300 case ir_binop_greater: 301 case ir_binop_lequal: 302 case ir_binop_gequal: 303 case ir_binop_equal: 304 case ir_binop_all_equal: 305 case ir_binop_nequal: 306 case ir_binop_any_nequal: 307 temp = this->result; 308 /* original gen4 does implicit conversion before comparison. */ 309 if (intel->gen < 5) 310 temp.type = op[0].type; 311 312 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 313 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 314 emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)); 315 break; 316 317 case ir_binop_logic_xor: 318 emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 319 break; 320 321 case ir_binop_logic_or: 322 emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 323 break; 324 325 case ir_binop_logic_and: 326 emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 327 break; 328 329 case ir_binop_dot: 330 case ir_unop_any: 331 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 332 break; 333 334 case ir_unop_noise: 335 assert(!"not reached: should be handled by lower_noise"); 336 break; 337 338 case ir_quadop_vector: 339 assert(!"not reached: should be handled by lower_quadop_vector"); 340 break; 341 342 case ir_unop_sqrt: 343 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); 344 break; 345 346 case ir_unop_rsq: 347 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); 348 break; 349 350 case ir_unop_i2u: 351 op[0].type = BRW_REGISTER_TYPE_UD; 352 this->result = op[0]; 353 break; 354 case ir_unop_u2i: 355 op[0].type = BRW_REGISTER_TYPE_D; 356 this->result = op[0]; 357 break; 358 case ir_unop_i2f: 359 case ir_unop_b2f: 360 case ir_unop_b2i: 361 case ir_unop_f2i: 362 emit(BRW_OPCODE_MOV, this->result, op[0]); 363 break; 364 case ir_unop_f2b: 365 case ir_unop_i2b: 366 temp = this->result; 367 /* original gen4 does implicit conversion before comparison. */ 368 if (intel->gen < 5) 369 temp.type = op[0].type; 370 371 inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)); 372 inst->conditional_mod = BRW_CONDITIONAL_NZ; 373 inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); 374 break; 375 376 case ir_unop_trunc: 377 emit(BRW_OPCODE_RNDZ, this->result, op[0]); 378 break; 379 case ir_unop_ceil: 380 op[0].negate = !op[0].negate; 381 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 382 this->result.negate = true; 383 break; 384 case ir_unop_floor: 385 inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); 386 break; 387 case ir_unop_fract: 388 inst = emit(BRW_OPCODE_FRC, this->result, op[0]); 389 break; 390 case ir_unop_round_even: 391 emit(BRW_OPCODE_RNDE, this->result, op[0]); 392 break; 393 394 case ir_binop_min: 395 if (intel->gen >= 6) { 396 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 397 inst->conditional_mod = BRW_CONDITIONAL_L; 398 } else { 399 /* Unalias the destination */ 400 this->result = fs_reg(this, ir->type); 401 402 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 403 inst->conditional_mod = BRW_CONDITIONAL_L; 404 405 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 406 inst->predicated = true; 407 } 408 break; 409 case ir_binop_max: 410 if (intel->gen >= 6) { 411 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 412 inst->conditional_mod = BRW_CONDITIONAL_GE; 413 } else { 414 /* Unalias the destination */ 415 this->result = fs_reg(this, ir->type); 416 417 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); 418 inst->conditional_mod = BRW_CONDITIONAL_G; 419 420 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); 421 inst->predicated = true; 422 } 423 break; 424 425 case ir_binop_pow: 426 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); 427 break; 428 429 case ir_unop_bit_not: 430 inst = emit(BRW_OPCODE_NOT, this->result, op[0]); 431 break; 432 case ir_binop_bit_and: 433 inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); 434 break; 435 case ir_binop_bit_xor: 436 inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); 437 break; 438 case ir_binop_bit_or: 439 inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); 440 break; 441 442 case ir_unop_u2f: 443 case ir_binop_lshift: 444 case ir_binop_rshift: 445 assert(!"GLSL 1.30 features unsupported"); 446 break; 447 } 448} 449 450void 451fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 452 const glsl_type *type, bool predicated) 453{ 454 switch (type->base_type) { 455 case GLSL_TYPE_FLOAT: 456 case GLSL_TYPE_UINT: 457 case GLSL_TYPE_INT: 458 case GLSL_TYPE_BOOL: 459 for (unsigned int i = 0; i < type->components(); i++) { 460 l.type = brw_type_for_base_type(type); 461 r.type = brw_type_for_base_type(type); 462 463 if (predicated || !l.equals(&r)) { 464 fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); 465 inst->predicated = predicated; 466 } 467 468 l.reg_offset++; 469 r.reg_offset++; 470 } 471 break; 472 case GLSL_TYPE_ARRAY: 473 for (unsigned int i = 0; i < type->length; i++) { 474 emit_assignment_writes(l, r, type->fields.array, predicated); 475 } 476 break; 477 478 case GLSL_TYPE_STRUCT: 479 for (unsigned int i = 0; i < type->length; i++) { 480 emit_assignment_writes(l, r, type->fields.structure[i].type, 481 predicated); 482 } 483 break; 484 485 case GLSL_TYPE_SAMPLER: 486 break; 487 488 default: 489 assert(!"not reached"); 490 break; 491 } 492} 493 494void 495fs_visitor::visit(ir_assignment *ir) 496{ 497 struct fs_reg l, r; 498 fs_inst *inst; 499 500 /* FINISHME: arrays on the lhs */ 501 this->result = reg_undef; 502 ir->lhs->accept(this); 503 l = this->result; 504 505 /* If we're doing a direct assignment, an RHS expression could 506 * drop its result right into our destination. Otherwise, tell it 507 * not to. 508 */ 509 if (ir->condition || 510 !(ir->lhs->type->is_scalar() || 511 (ir->lhs->type->is_vector() && 512 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) { 513 this->result = reg_undef; 514 } 515 516 ir->rhs->accept(this); 517 r = this->result; 518 519 assert(l.file != BAD_FILE); 520 assert(r.file != BAD_FILE); 521 522 if (ir->condition) { 523 emit_bool_to_cond_code(ir->condition); 524 } 525 526 if (ir->lhs->type->is_scalar() || 527 ir->lhs->type->is_vector()) { 528 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 529 if (ir->write_mask & (1 << i)) { 530 if (ir->condition) { 531 inst = emit(BRW_OPCODE_MOV, l, r); 532 inst->predicated = true; 533 } else if (!l.equals(&r)) { 534 inst = emit(BRW_OPCODE_MOV, l, r); 535 } 536 537 r.reg_offset++; 538 } 539 l.reg_offset++; 540 } 541 } else { 542 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 543 } 544} 545 546fs_inst * 547fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 548 int sampler) 549{ 550 int mlen; 551 int base_mrf = 1; 552 bool simd16 = false; 553 fs_reg orig_dst; 554 555 /* g0 header. */ 556 mlen = 1; 557 558 if (ir->shadow_comparitor && ir->op != ir_txd) { 559 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 560 fs_inst *inst = emit(BRW_OPCODE_MOV, 561 fs_reg(MRF, base_mrf + mlen + i), coordinate); 562 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 563 inst->saturate = true; 564 565 coordinate.reg_offset++; 566 } 567 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 568 mlen += 3; 569 570 if (ir->op == ir_tex) { 571 /* There's no plain shadow compare message, so we use shadow 572 * compare with a bias of 0.0. 573 */ 574 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); 575 mlen++; 576 } else if (ir->op == ir_txb) { 577 this->result = reg_undef; 578 ir->lod_info.bias->accept(this); 579 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 580 mlen++; 581 } else { 582 assert(ir->op == ir_txl); 583 this->result = reg_undef; 584 ir->lod_info.lod->accept(this); 585 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 586 mlen++; 587 } 588 589 this->result = reg_undef; 590 ir->shadow_comparitor->accept(this); 591 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 592 mlen++; 593 } else if (ir->op == ir_tex) { 594 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 595 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), 596 coordinate); 597 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 598 inst->saturate = true; 599 coordinate.reg_offset++; 600 } 601 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 602 mlen += 3; 603 } else if (ir->op == ir_txd) { 604 this->result = reg_undef; 605 ir->lod_info.grad.dPdx->accept(this); 606 fs_reg dPdx = this->result; 607 608 this->result = reg_undef; 609 ir->lod_info.grad.dPdy->accept(this); 610 fs_reg dPdy = this->result; 611 612 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 613 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); 614 coordinate.reg_offset++; 615 } 616 /* the slots for u and v are always present, but r is optional */ 617 mlen += MAX2(ir->coordinate->type->vector_elements, 2); 618 619 /* P = u, v, r 620 * dPdx = dudx, dvdx, drdx 621 * dPdy = dudy, dvdy, drdy 622 * 623 * 1-arg: Does not exist. 624 * 625 * 2-arg: dudx dvdx dudy dvdy 626 * dPdx.x dPdx.y dPdy.x dPdy.y 627 * m4 m5 m6 m7 628 * 629 * 3-arg: dudx dvdx drdx dudy dvdy drdy 630 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z 631 * m5 m6 m7 m8 m9 m10 632 */ 633 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 634 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 635 dPdx.reg_offset++; 636 } 637 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); 638 639 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { 640 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 641 dPdy.reg_offset++; 642 } 643 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); 644 } else { 645 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 646 * instructions. We'll need to do SIMD16 here. 647 */ 648 assert(ir->op == ir_txb || ir->op == ir_txl); 649 650 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 651 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, 652 base_mrf + mlen + i * 2), 653 coordinate); 654 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 655 inst->saturate = true; 656 coordinate.reg_offset++; 657 } 658 659 /* lod/bias appears after u/v/r. */ 660 mlen += 6; 661 662 if (ir->op == ir_txb) { 663 this->result = reg_undef; 664 ir->lod_info.bias->accept(this); 665 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 666 mlen++; 667 } else { 668 this->result = reg_undef; 669 ir->lod_info.lod->accept(this); 670 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 671 mlen++; 672 } 673 674 /* The unused upper half. */ 675 mlen++; 676 677 /* Now, since we're doing simd16, the return is 2 interleaved 678 * vec4s where the odd-indexed ones are junk. We'll need to move 679 * this weirdness around to the expected layout. 680 */ 681 simd16 = true; 682 orig_dst = dst; 683 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 684 2)); 685 dst.type = BRW_REGISTER_TYPE_F; 686 } 687 688 fs_inst *inst = NULL; 689 switch (ir->op) { 690 case ir_tex: 691 inst = emit(FS_OPCODE_TEX, dst); 692 break; 693 case ir_txb: 694 inst = emit(FS_OPCODE_TXB, dst); 695 break; 696 case ir_txl: 697 inst = emit(FS_OPCODE_TXL, dst); 698 break; 699 case ir_txd: 700 inst = emit(FS_OPCODE_TXD, dst); 701 break; 702 case ir_txf: 703 assert(!"GLSL 1.30 features unsupported"); 704 break; 705 } 706 inst->base_mrf = base_mrf; 707 inst->mlen = mlen; 708 inst->header_present = true; 709 710 if (simd16) { 711 for (int i = 0; i < 4; i++) { 712 emit(BRW_OPCODE_MOV, orig_dst, dst); 713 orig_dst.reg_offset++; 714 dst.reg_offset += 2; 715 } 716 } 717 718 return inst; 719} 720 721/* gen5's sampler has slots for u, v, r, array index, then optional 722 * parameters like shadow comparitor or LOD bias. If optional 723 * parameters aren't present, those base slots are optional and don't 724 * need to be included in the message. 725 * 726 * We don't fill in the unnecessary slots regardless, which may look 727 * surprising in the disassembly. 728 */ 729fs_inst * 730fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 731 int sampler) 732{ 733 int mlen = 0; 734 int base_mrf = 2; 735 int reg_width = c->dispatch_width / 8; 736 bool header_present = false; 737 738 if (ir->offset) { 739 /* The offsets set up by the ir_texture visitor are in the 740 * m1 header, so we can't go headerless. 741 */ 742 header_present = true; 743 mlen++; 744 base_mrf--; 745 } 746 747 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 748 fs_inst *inst = emit(BRW_OPCODE_MOV, 749 fs_reg(MRF, base_mrf + mlen + i * reg_width), 750 coordinate); 751 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 752 inst->saturate = true; 753 coordinate.reg_offset++; 754 } 755 mlen += ir->coordinate->type->vector_elements * reg_width; 756 757 if (ir->shadow_comparitor && ir->op != ir_txd) { 758 mlen = MAX2(mlen, header_present + 4 * reg_width); 759 760 this->result = reg_undef; 761 ir->shadow_comparitor->accept(this); 762 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 763 mlen += reg_width; 764 } 765 766 fs_inst *inst = NULL; 767 switch (ir->op) { 768 case ir_tex: 769 inst = emit(FS_OPCODE_TEX, dst); 770 break; 771 case ir_txb: 772 this->result = reg_undef; 773 ir->lod_info.bias->accept(this); 774 mlen = MAX2(mlen, header_present + 4 * reg_width); 775 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 776 mlen += reg_width; 777 778 inst = emit(FS_OPCODE_TXB, dst); 779 780 break; 781 case ir_txl: 782 this->result = reg_undef; 783 ir->lod_info.lod->accept(this); 784 mlen = MAX2(mlen, header_present + 4 * reg_width); 785 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 786 mlen += reg_width; 787 788 inst = emit(FS_OPCODE_TXL, dst); 789 break; 790 case ir_txd: { 791 this->result = reg_undef; 792 ir->lod_info.grad.dPdx->accept(this); 793 fs_reg dPdx = this->result; 794 795 this->result = reg_undef; 796 ir->lod_info.grad.dPdy->accept(this); 797 fs_reg dPdy = this->result; 798 799 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ 800 801 /** 802 * P = u, v, r 803 * dPdx = dudx, dvdx, drdx 804 * dPdy = dudy, dvdy, drdy 805 * 806 * Load up these values: 807 * - dudx dudy dvdx dvdy drdx drdy 808 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z 809 */ 810 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { 811 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 812 dPdx.reg_offset++; 813 mlen += reg_width; 814 815 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 816 dPdy.reg_offset++; 817 mlen += reg_width; 818 } 819 820 inst = emit(FS_OPCODE_TXD, dst); 821 break; 822 } 823 case ir_txf: 824 assert(!"GLSL 1.30 features unsupported"); 825 break; 826 } 827 inst->base_mrf = base_mrf; 828 inst->mlen = mlen; 829 inst->header_present = header_present; 830 831 if (mlen > 11) { 832 fail("Message length >11 disallowed by hardware\n"); 833 } 834 835 return inst; 836} 837 838fs_inst * 839fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 840 int sampler) 841{ 842 int mlen = 0; 843 int base_mrf = 2; 844 int reg_width = c->dispatch_width / 8; 845 bool header_present = false; 846 847 if (ir->offset) { 848 /* The offsets set up by the ir_texture visitor are in the 849 * m1 header, so we can't go headerless. 850 */ 851 header_present = true; 852 mlen++; 853 base_mrf--; 854 } 855 856 if (ir->shadow_comparitor && ir->op != ir_txd) { 857 this->result = reg_undef; 858 ir->shadow_comparitor->accept(this); 859 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 860 mlen += reg_width; 861 } 862 863 /* Set up the LOD info */ 864 switch (ir->op) { 865 case ir_tex: 866 break; 867 case ir_txb: 868 this->result = reg_undef; 869 ir->lod_info.bias->accept(this); 870 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 871 mlen += reg_width; 872 break; 873 case ir_txl: 874 this->result = reg_undef; 875 ir->lod_info.lod->accept(this); 876 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); 877 mlen += reg_width; 878 break; 879 case ir_txd: { 880 if (c->dispatch_width == 16) 881 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); 882 883 this->result = reg_undef; 884 ir->lod_info.grad.dPdx->accept(this); 885 fs_reg dPdx = this->result; 886 887 this->result = reg_undef; 888 ir->lod_info.grad.dPdy->accept(this); 889 fs_reg dPdy = this->result; 890 891 /* Load dPdx and the coordinate together: 892 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z 893 */ 894 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 895 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 896 coordinate); 897 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 898 inst->saturate = true; 899 coordinate.reg_offset++; 900 mlen += reg_width; 901 902 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); 903 dPdx.reg_offset++; 904 mlen += reg_width; 905 906 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); 907 dPdy.reg_offset++; 908 mlen += reg_width; 909 } 910 break; 911 } 912 case ir_txf: 913 assert(!"GLSL 1.30 features unsupported"); 914 break; 915 } 916 917 /* Set up the coordinate (except for TXD where it was done earlier) */ 918 if (ir->op != ir_txd) { 919 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { 920 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 921 coordinate); 922 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) 923 inst->saturate = true; 924 coordinate.reg_offset++; 925 mlen += reg_width; 926 } 927 } 928 929 /* Generate the SEND */ 930 fs_inst *inst = NULL; 931 switch (ir->op) { 932 case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break; 933 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; 934 case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; 935 case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; 936 case ir_txf: assert(!"TXF unsupported."); 937 } 938 inst->base_mrf = base_mrf; 939 inst->mlen = mlen; 940 inst->header_present = header_present; 941 942 if (mlen > 11) { 943 fail("Message length >11 disallowed by hardware\n"); 944 } 945 946 return inst; 947} 948 949void 950fs_visitor::visit(ir_texture *ir) 951{ 952 fs_inst *inst = NULL; 953 954 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); 955 sampler = fp->Base.SamplerUnits[sampler]; 956 957 /* Our hardware doesn't have a sample_d_c message, so shadow compares 958 * for textureGrad/TXD need to be emulated with instructions. 959 */ 960 bool hw_compare_supported = ir->op != ir_txd; 961 if (ir->shadow_comparitor && !hw_compare_supported) { 962 assert(c->key.compare_funcs[sampler] != GL_NONE); 963 /* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */ 964 if (c->key.compare_funcs[sampler] == GL_ALWAYS) 965 return swizzle_result(ir, fs_reg(1.0f), sampler); 966 else if (c->key.compare_funcs[sampler] == GL_NEVER) 967 return swizzle_result(ir, fs_reg(0.0f), sampler); 968 } 969 970 this->result = reg_undef; 971 ir->coordinate->accept(this); 972 fs_reg coordinate = this->result; 973 974 if (ir->offset != NULL) { 975 ir_constant *offset = ir->offset->as_constant(); 976 assert(offset != NULL); 977 978 signed char offsets[3]; 979 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) 980 offsets[i] = (signed char) offset->value.i[i]; 981 982 /* Combine all three offsets into a single unsigned dword: 983 * 984 * bits 11:8 - U Offset (X component) 985 * bits 7:4 - V Offset (Y component) 986 * bits 3:0 - R Offset (Z component) 987 */ 988 unsigned offset_bits = 0; 989 for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { 990 const unsigned shift = 4 * (2 - i); 991 offset_bits |= (offsets[i] << shift) & (0xF << shift); 992 } 993 994 /* Explicitly set up the message header by copying g0 to msg reg m1. */ 995 emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), 996 fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)); 997 998 /* Then set the offset bits in DWord 2 of the message header. */ 999 emit(BRW_OPCODE_MOV, 1000 fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), 1001 BRW_REGISTER_TYPE_UD)), 1002 fs_reg(brw_imm_uw(offset_bits))); 1003 } 1004 1005 /* Should be lowered by do_lower_texture_projection */ 1006 assert(!ir->projector); 1007 1008 /* The 965 requires the EU to do the normalization of GL rectangle 1009 * texture coordinates. We use the program parameter state 1010 * tracking to get the scaling factor. 1011 */ 1012 if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { 1013 struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; 1014 int tokens[STATE_LENGTH] = { 1015 STATE_INTERNAL, 1016 STATE_TEXRECT_SCALE, 1017 sampler, 1018 0, 1019 0 1020 }; 1021 1022 if (c->dispatch_width == 16) { 1023 fail("rectangle scale uniform setup not supported on 16-wide\n"); 1024 this->result = fs_reg(this, ir->type); 1025 return; 1026 } 1027 1028 c->prog_data.param_convert[c->prog_data.nr_params] = 1029 PARAM_NO_CONVERT; 1030 c->prog_data.param_convert[c->prog_data.nr_params + 1] = 1031 PARAM_NO_CONVERT; 1032 1033 fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); 1034 fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); 1035 GLuint index = _mesa_add_state_reference(params, 1036 (gl_state_index *)tokens); 1037 1038 this->param_index[c->prog_data.nr_params] = index; 1039 this->param_offset[c->prog_data.nr_params] = 0; 1040 c->prog_data.nr_params++; 1041 this->param_index[c->prog_data.nr_params] = index; 1042 this->param_offset[c->prog_data.nr_params] = 1; 1043 c->prog_data.nr_params++; 1044 1045 fs_reg dst = fs_reg(this, ir->coordinate->type); 1046 fs_reg src = coordinate; 1047 coordinate = dst; 1048 1049 emit(BRW_OPCODE_MUL, dst, src, scale_x); 1050 dst.reg_offset++; 1051 src.reg_offset++; 1052 emit(BRW_OPCODE_MUL, dst, src, scale_y); 1053 } 1054 1055 /* Writemasking doesn't eliminate channels on SIMD8 texture 1056 * samples, so don't worry about them. 1057 */ 1058 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1059 1060 if (intel->gen >= 7) { 1061 inst = emit_texture_gen7(ir, dst, coordinate, sampler); 1062 } else if (intel->gen >= 5) { 1063 inst = emit_texture_gen5(ir, dst, coordinate, sampler); 1064 } else { 1065 inst = emit_texture_gen4(ir, dst, coordinate, sampler); 1066 } 1067 1068 /* If there's an offset, we already set up m1. To avoid the implied move, 1069 * use the null register. Otherwise, we want an implied move from g0. 1070 */ 1071 if (ir->offset != NULL || !inst->header_present) 1072 inst->src[0] = reg_undef; 1073 else 1074 inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1075 1076 inst->sampler = sampler; 1077 1078 if (ir->shadow_comparitor) { 1079 if (hw_compare_supported) { 1080 inst->shadow_compare = true; 1081 } else { 1082 this->result = reg_undef; 1083 ir->shadow_comparitor->accept(this); 1084 fs_reg ref = this->result; 1085 1086 fs_reg value = dst; 1087 dst = fs_reg(this, glsl_type::vec4_type); 1088 1089 /* FINISHME: This needs to be done pre-filtering. */ 1090 1091 uint32_t conditional = 0; 1092 switch (c->key.compare_funcs[sampler]) { 1093 /* GL_ALWAYS and GL_NEVER were handled at the top of the function */ 1094 case GL_LESS: conditional = BRW_CONDITIONAL_L; break; 1095 case GL_GREATER: conditional = BRW_CONDITIONAL_G; break; 1096 case GL_LEQUAL: conditional = BRW_CONDITIONAL_LE; break; 1097 case GL_GEQUAL: conditional = BRW_CONDITIONAL_GE; break; 1098 case GL_EQUAL: conditional = BRW_CONDITIONAL_EQ; break; 1099 case GL_NOTEQUAL: conditional = BRW_CONDITIONAL_NEQ; break; 1100 default: assert(!"Should not get here: bad shadow compare function"); 1101 } 1102 1103 /* Use conditional moves to load 0 or 1 as the result */ 1104 this->current_annotation = "manual shadow comparison"; 1105 for (int i = 0; i < 4; i++) { 1106 inst = emit(BRW_OPCODE_MOV, dst, fs_reg(0.0f)); 1107 1108 inst = emit(BRW_OPCODE_CMP, reg_null_f, ref, value); 1109 inst->conditional_mod = conditional; 1110 1111 inst = emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); 1112 inst->predicated = true; 1113 1114 dst.reg_offset++; 1115 value.reg_offset++; 1116 } 1117 dst.reg_offset = 0; 1118 } 1119 } 1120 1121 swizzle_result(ir, dst, sampler); 1122} 1123 1124/** 1125 * Swizzle the result of a texture result. This is necessary for 1126 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. 1127 */ 1128void 1129fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) 1130{ 1131 this->result = orig_val; 1132 1133 if (ir->type == glsl_type::float_type) { 1134 /* Ignore DEPTH_TEXTURE_MODE swizzling. */ 1135 assert(ir->sampler->type->sampler_shadow); 1136 } else if (c->key.tex_swizzles[sampler] != SWIZZLE_NOOP) { 1137 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); 1138 1139 for (int i = 0; i < 4; i++) { 1140 int swiz = GET_SWZ(c->key.tex_swizzles[sampler], i); 1141 fs_reg l = swizzled_result; 1142 l.reg_offset += i; 1143 1144 if (swiz == SWIZZLE_ZERO) { 1145 emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); 1146 } else if (swiz == SWIZZLE_ONE) { 1147 emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); 1148 } else { 1149 fs_reg r = orig_val; 1150 r.reg_offset += GET_SWZ(c->key.tex_swizzles[sampler], i); 1151 emit(BRW_OPCODE_MOV, l, r); 1152 } 1153 } 1154 this->result = swizzled_result; 1155 } 1156} 1157 1158void 1159fs_visitor::visit(ir_swizzle *ir) 1160{ 1161 this->result = reg_undef; 1162 ir->val->accept(this); 1163 fs_reg val = this->result; 1164 1165 if (ir->type->vector_elements == 1) { 1166 this->result.reg_offset += ir->mask.x; 1167 return; 1168 } 1169 1170 fs_reg result = fs_reg(this, ir->type); 1171 this->result = result; 1172 1173 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1174 fs_reg channel = val; 1175 int swiz = 0; 1176 1177 switch (i) { 1178 case 0: 1179 swiz = ir->mask.x; 1180 break; 1181 case 1: 1182 swiz = ir->mask.y; 1183 break; 1184 case 2: 1185 swiz = ir->mask.z; 1186 break; 1187 case 3: 1188 swiz = ir->mask.w; 1189 break; 1190 } 1191 1192 channel.reg_offset += swiz; 1193 emit(BRW_OPCODE_MOV, result, channel); 1194 result.reg_offset++; 1195 } 1196} 1197 1198void 1199fs_visitor::visit(ir_discard *ir) 1200{ 1201 assert(ir->condition == NULL); /* FINISHME */ 1202 1203 emit(FS_OPCODE_DISCARD); 1204 kill_emitted = true; 1205} 1206 1207void 1208fs_visitor::visit(ir_constant *ir) 1209{ 1210 /* Set this->result to reg at the bottom of the function because some code 1211 * paths will cause this visitor to be applied to other fields. This will 1212 * cause the value stored in this->result to be modified. 1213 * 1214 * Make reg constant so that it doesn't get accidentally modified along the 1215 * way. Yes, I actually had this problem. :( 1216 */ 1217 const fs_reg reg(this, ir->type); 1218 fs_reg dst_reg = reg; 1219 1220 if (ir->type->is_array()) { 1221 const unsigned size = type_size(ir->type->fields.array); 1222 1223 for (unsigned i = 0; i < ir->type->length; i++) { 1224 this->result = reg_undef; 1225 ir->array_elements[i]->accept(this); 1226 fs_reg src_reg = this->result; 1227 1228 dst_reg.type = src_reg.type; 1229 for (unsigned j = 0; j < size; j++) { 1230 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1231 src_reg.reg_offset++; 1232 dst_reg.reg_offset++; 1233 } 1234 } 1235 } else if (ir->type->is_record()) { 1236 foreach_list(node, &ir->components) { 1237 ir_instruction *const field = (ir_instruction *) node; 1238 const unsigned size = type_size(field->type); 1239 1240 this->result = reg_undef; 1241 field->accept(this); 1242 fs_reg src_reg = this->result; 1243 1244 dst_reg.type = src_reg.type; 1245 for (unsigned j = 0; j < size; j++) { 1246 emit(BRW_OPCODE_MOV, dst_reg, src_reg); 1247 src_reg.reg_offset++; 1248 dst_reg.reg_offset++; 1249 } 1250 } 1251 } else { 1252 const unsigned size = type_size(ir->type); 1253 1254 for (unsigned i = 0; i < size; i++) { 1255 switch (ir->type->base_type) { 1256 case GLSL_TYPE_FLOAT: 1257 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); 1258 break; 1259 case GLSL_TYPE_UINT: 1260 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); 1261 break; 1262 case GLSL_TYPE_INT: 1263 emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); 1264 break; 1265 case GLSL_TYPE_BOOL: 1266 emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); 1267 break; 1268 default: 1269 assert(!"Non-float/uint/int/bool constant"); 1270 } 1271 dst_reg.reg_offset++; 1272 } 1273 } 1274 1275 this->result = reg; 1276} 1277 1278void 1279fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 1280{ 1281 ir_expression *expr = ir->as_expression(); 1282 1283 if (expr) { 1284 fs_reg op[2]; 1285 fs_inst *inst; 1286 1287 assert(expr->get_num_operands() <= 2); 1288 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1289 assert(expr->operands[i]->type->is_scalar()); 1290 1291 this->result = reg_undef; 1292 expr->operands[i]->accept(this); 1293 op[i] = this->result; 1294 } 1295 1296 switch (expr->operation) { 1297 case ir_unop_logic_not: 1298 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); 1299 inst->conditional_mod = BRW_CONDITIONAL_Z; 1300 break; 1301 1302 case ir_binop_logic_xor: 1303 inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]); 1304 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1305 break; 1306 1307 case ir_binop_logic_or: 1308 inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]); 1309 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1310 break; 1311 1312 case ir_binop_logic_and: 1313 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]); 1314 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1315 break; 1316 1317 case ir_unop_f2b: 1318 if (intel->gen >= 6) { 1319 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); 1320 } else { 1321 inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); 1322 } 1323 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1324 break; 1325 1326 case ir_unop_i2b: 1327 if (intel->gen >= 6) { 1328 inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); 1329 } else { 1330 inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); 1331 } 1332 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1333 break; 1334 1335 case ir_binop_greater: 1336 case ir_binop_gequal: 1337 case ir_binop_less: 1338 case ir_binop_lequal: 1339 case ir_binop_equal: 1340 case ir_binop_all_equal: 1341 case ir_binop_nequal: 1342 case ir_binop_any_nequal: 1343 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); 1344 inst->conditional_mod = 1345 brw_conditional_for_comparison(expr->operation); 1346 break; 1347 1348 default: 1349 assert(!"not reached"); 1350 fail("bad cond code\n"); 1351 break; 1352 } 1353 return; 1354 } 1355 1356 this->result = reg_undef; 1357 ir->accept(this); 1358 1359 if (intel->gen >= 6) { 1360 fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); 1361 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1362 } else { 1363 fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result); 1364 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1365 } 1366} 1367 1368/** 1369 * Emit a gen6 IF statement with the comparison folded into the IF 1370 * instruction. 1371 */ 1372void 1373fs_visitor::emit_if_gen6(ir_if *ir) 1374{ 1375 ir_expression *expr = ir->condition->as_expression(); 1376 1377 if (expr) { 1378 fs_reg op[2]; 1379 fs_inst *inst; 1380 fs_reg temp; 1381 1382 assert(expr->get_num_operands() <= 2); 1383 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 1384 assert(expr->operands[i]->type->is_scalar()); 1385 1386 this->result = reg_undef; 1387 expr->operands[i]->accept(this); 1388 op[i] = this->result; 1389 } 1390 1391 switch (expr->operation) { 1392 case ir_unop_logic_not: 1393 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); 1394 inst->conditional_mod = BRW_CONDITIONAL_Z; 1395 return; 1396 1397 case ir_binop_logic_xor: 1398 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1399 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1400 return; 1401 1402 case ir_binop_logic_or: 1403 temp = fs_reg(this, glsl_type::bool_type); 1404 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 1405 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1406 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1407 return; 1408 1409 case ir_binop_logic_and: 1410 temp = fs_reg(this, glsl_type::bool_type); 1411 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 1412 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); 1413 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1414 return; 1415 1416 case ir_unop_f2b: 1417 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); 1418 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1419 return; 1420 1421 case ir_unop_i2b: 1422 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1423 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1424 return; 1425 1426 case ir_binop_greater: 1427 case ir_binop_gequal: 1428 case ir_binop_less: 1429 case ir_binop_lequal: 1430 case ir_binop_equal: 1431 case ir_binop_all_equal: 1432 case ir_binop_nequal: 1433 case ir_binop_any_nequal: 1434 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); 1435 inst->conditional_mod = 1436 brw_conditional_for_comparison(expr->operation); 1437 return; 1438 default: 1439 assert(!"not reached"); 1440 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); 1441 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1442 fail("bad condition\n"); 1443 return; 1444 } 1445 return; 1446 } 1447 1448 this->result = reg_undef; 1449 ir->condition->accept(this); 1450 1451 fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); 1452 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1453} 1454 1455void 1456fs_visitor::visit(ir_if *ir) 1457{ 1458 fs_inst *inst; 1459 1460 if (intel->gen != 6 && c->dispatch_width == 16) { 1461 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1462 } 1463 1464 /* Don't point the annotation at the if statement, because then it plus 1465 * the then and else blocks get printed. 1466 */ 1467 this->base_ir = ir->condition; 1468 1469 if (intel->gen == 6) { 1470 emit_if_gen6(ir); 1471 } else { 1472 emit_bool_to_cond_code(ir->condition); 1473 1474 inst = emit(BRW_OPCODE_IF); 1475 inst->predicated = true; 1476 } 1477 1478 foreach_list(node, &ir->then_instructions) { 1479 ir_instruction *ir = (ir_instruction *)node; 1480 this->base_ir = ir; 1481 this->result = reg_undef; 1482 ir->accept(this); 1483 } 1484 1485 if (!ir->else_instructions.is_empty()) { 1486 emit(BRW_OPCODE_ELSE); 1487 1488 foreach_list(node, &ir->else_instructions) { 1489 ir_instruction *ir = (ir_instruction *)node; 1490 this->base_ir = ir; 1491 this->result = reg_undef; 1492 ir->accept(this); 1493 } 1494 } 1495 1496 emit(BRW_OPCODE_ENDIF); 1497} 1498 1499void 1500fs_visitor::visit(ir_loop *ir) 1501{ 1502 fs_reg counter = reg_undef; 1503 1504 if (c->dispatch_width == 16) { 1505 fail("Can't support (non-uniform) control flow on 16-wide\n"); 1506 } 1507 1508 if (ir->counter) { 1509 this->base_ir = ir->counter; 1510 ir->counter->accept(this); 1511 counter = *(variable_storage(ir->counter)); 1512 1513 if (ir->from) { 1514 this->result = counter; 1515 1516 this->base_ir = ir->from; 1517 this->result = counter; 1518 ir->from->accept(this); 1519 1520 if (!this->result.equals(&counter)) 1521 emit(BRW_OPCODE_MOV, counter, this->result); 1522 } 1523 } 1524 1525 emit(BRW_OPCODE_DO); 1526 1527 if (ir->to) { 1528 this->base_ir = ir->to; 1529 this->result = reg_undef; 1530 ir->to->accept(this); 1531 1532 fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); 1533 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 1534 1535 inst = emit(BRW_OPCODE_BREAK); 1536 inst->predicated = true; 1537 } 1538 1539 foreach_list(node, &ir->body_instructions) { 1540 ir_instruction *ir = (ir_instruction *)node; 1541 1542 this->base_ir = ir; 1543 this->result = reg_undef; 1544 ir->accept(this); 1545 } 1546 1547 if (ir->increment) { 1548 this->base_ir = ir->increment; 1549 this->result = reg_undef; 1550 ir->increment->accept(this); 1551 emit(BRW_OPCODE_ADD, counter, counter, this->result); 1552 } 1553 1554 emit(BRW_OPCODE_WHILE); 1555} 1556 1557void 1558fs_visitor::visit(ir_loop_jump *ir) 1559{ 1560 switch (ir->mode) { 1561 case ir_loop_jump::jump_break: 1562 emit(BRW_OPCODE_BREAK); 1563 break; 1564 case ir_loop_jump::jump_continue: 1565 emit(BRW_OPCODE_CONTINUE); 1566 break; 1567 } 1568} 1569 1570void 1571fs_visitor::visit(ir_call *ir) 1572{ 1573 assert(!"FINISHME"); 1574} 1575 1576void 1577fs_visitor::visit(ir_return *ir) 1578{ 1579 assert(!"FINISHME"); 1580} 1581 1582void 1583fs_visitor::visit(ir_function *ir) 1584{ 1585 /* Ignore function bodies other than main() -- we shouldn't see calls to 1586 * them since they should all be inlined before we get to ir_to_mesa. 1587 */ 1588 if (strcmp(ir->name, "main") == 0) { 1589 const ir_function_signature *sig; 1590 exec_list empty; 1591 1592 sig = ir->matching_signature(&empty); 1593 1594 assert(sig); 1595 1596 foreach_list(node, &sig->body) { 1597 ir_instruction *ir = (ir_instruction *)node; 1598 this->base_ir = ir; 1599 this->result = reg_undef; 1600 ir->accept(this); 1601 } 1602 } 1603} 1604 1605void 1606fs_visitor::visit(ir_function_signature *ir) 1607{ 1608 assert(!"not reached"); 1609 (void)ir; 1610} 1611 1612fs_inst * 1613fs_visitor::emit(fs_inst inst) 1614{ 1615 fs_inst *list_inst = new(mem_ctx) fs_inst; 1616 *list_inst = inst; 1617 1618 if (force_uncompressed_stack > 0) 1619 list_inst->force_uncompressed = true; 1620 else if (force_sechalf_stack > 0) 1621 list_inst->force_sechalf = true; 1622 1623 list_inst->annotation = this->current_annotation; 1624 list_inst->ir = this->base_ir; 1625 1626 this->instructions.push_tail(list_inst); 1627 1628 return list_inst; 1629} 1630 1631/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1632void 1633fs_visitor::emit_dummy_fs() 1634{ 1635 /* Everyone's favorite color. */ 1636 emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f)); 1637 emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f)); 1638 emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f)); 1639 emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f)); 1640 1641 fs_inst *write; 1642 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); 1643 write->base_mrf = 2; 1644} 1645 1646/* The register location here is relative to the start of the URB 1647 * data. It will get adjusted to be a real location before 1648 * generate_code() time. 1649 */ 1650struct brw_reg 1651fs_visitor::interp_reg(int location, int channel) 1652{ 1653 int regnr = urb_setup[location] * 2 + channel / 2; 1654 int stride = (channel & 1) * 4; 1655 1656 assert(urb_setup[location] != -1); 1657 1658 return brw_vec1_grf(regnr, stride); 1659} 1660 1661/** Emits the interpolation for the varying inputs. */ 1662void 1663fs_visitor::emit_interpolation_setup_gen4() 1664{ 1665 this->current_annotation = "compute pixel centers"; 1666 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1667 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1668 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1669 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1670 1671 emit(FS_OPCODE_PIXEL_X, this->pixel_x); 1672 emit(FS_OPCODE_PIXEL_Y, this->pixel_y); 1673 1674 this->current_annotation = "compute pixel deltas from v0"; 1675 if (brw->has_pln) { 1676 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1677 this->delta_y = this->delta_x; 1678 this->delta_y.reg_offset++; 1679 } else { 1680 this->delta_x = fs_reg(this, glsl_type::float_type); 1681 this->delta_y = fs_reg(this, glsl_type::float_type); 1682 } 1683 emit(BRW_OPCODE_ADD, this->delta_x, 1684 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); 1685 emit(BRW_OPCODE_ADD, this->delta_y, 1686 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); 1687 1688 this->current_annotation = "compute pos.w and 1/pos.w"; 1689 /* Compute wpos.w. It's always in our setup, since it's needed to 1690 * interpolate the other attributes. 1691 */ 1692 this->wpos_w = fs_reg(this, glsl_type::float_type); 1693 emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1694 interp_reg(FRAG_ATTRIB_WPOS, 3)); 1695 /* Compute the pixel 1/W value from wpos.w. */ 1696 this->pixel_w = fs_reg(this, glsl_type::float_type); 1697 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); 1698 this->current_annotation = NULL; 1699} 1700 1701/** Emits the interpolation for the varying inputs. */ 1702void 1703fs_visitor::emit_interpolation_setup_gen6() 1704{ 1705 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1706 1707 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1708 this->current_annotation = "compute pixel centers"; 1709 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); 1710 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); 1711 int_pixel_x.type = BRW_REGISTER_TYPE_UW; 1712 int_pixel_y.type = BRW_REGISTER_TYPE_UW; 1713 emit(BRW_OPCODE_ADD, 1714 int_pixel_x, 1715 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1716 fs_reg(brw_imm_v(0x10101010))); 1717 emit(BRW_OPCODE_ADD, 1718 int_pixel_y, 1719 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1720 fs_reg(brw_imm_v(0x11001100))); 1721 1722 /* As of gen6, we can no longer mix float and int sources. We have 1723 * to turn the integer pixel centers into floats for their actual 1724 * use. 1725 */ 1726 this->pixel_x = fs_reg(this, glsl_type::float_type); 1727 this->pixel_y = fs_reg(this, glsl_type::float_type); 1728 emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); 1729 emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); 1730 1731 this->current_annotation = "compute pos.w"; 1732 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); 1733 this->wpos_w = fs_reg(this, glsl_type::float_type); 1734 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); 1735 1736 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1737 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1738 1739 this->current_annotation = NULL; 1740} 1741 1742void 1743fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) 1744{ 1745 int reg_width = c->dispatch_width / 8; 1746 fs_inst *inst; 1747 1748 if (c->dispatch_width == 8 || intel->gen == 6) { 1749 /* SIMD8 write looks like: 1750 * m + 0: r0 1751 * m + 1: r1 1752 * m + 2: g0 1753 * m + 3: g1 1754 * 1755 * gen6 SIMD16 DP write looks like: 1756 * m + 0: r0 1757 * m + 1: r1 1758 * m + 2: g0 1759 * m + 3: g1 1760 * m + 4: b0 1761 * m + 5: b1 1762 * m + 6: a0 1763 * m + 7: a1 1764 */ 1765 inst = emit(BRW_OPCODE_MOV, 1766 fs_reg(MRF, first_color_mrf + index * reg_width), 1767 color); 1768 inst->saturate = c->key.clamp_fragment_color; 1769 } else { 1770 /* pre-gen6 SIMD16 single source DP write looks like: 1771 * m + 0: r0 1772 * m + 1: g0 1773 * m + 2: b0 1774 * m + 3: a0 1775 * m + 4: r1 1776 * m + 5: g1 1777 * m + 6: b1 1778 * m + 7: a1 1779 */ 1780 if (brw->has_compr4) { 1781 /* By setting the high bit of the MRF register number, we 1782 * indicate that we want COMPR4 mode - instead of doing the 1783 * usual destination + 1 for the second half we get 1784 * destination + 4. 1785 */ 1786 inst = emit(BRW_OPCODE_MOV, 1787 fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), 1788 color); 1789 inst->saturate = c->key.clamp_fragment_color; 1790 } else { 1791 push_force_uncompressed(); 1792 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), 1793 color); 1794 inst->saturate = c->key.clamp_fragment_color; 1795 pop_force_uncompressed(); 1796 1797 push_force_sechalf(); 1798 color.sechalf = true; 1799 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), 1800 color); 1801 inst->saturate = c->key.clamp_fragment_color; 1802 pop_force_sechalf(); 1803 color.sechalf = false; 1804 } 1805 } 1806} 1807 1808void 1809fs_visitor::emit_fb_writes() 1810{ 1811 this->current_annotation = "FB write header"; 1812 GLboolean header_present = GL_TRUE; 1813 int base_mrf = 2; 1814 int nr = base_mrf; 1815 int reg_width = c->dispatch_width / 8; 1816 1817 if (intel->gen >= 6 && 1818 !this->kill_emitted && 1819 c->key.nr_color_regions == 1) { 1820 header_present = false; 1821 } 1822 1823 if (header_present) { 1824 /* m2, m3 header */ 1825 nr += 2; 1826 } 1827 1828 if (c->aa_dest_stencil_reg) { 1829 push_force_uncompressed(); 1830 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1831 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); 1832 pop_force_uncompressed(); 1833 } 1834 1835 /* Reserve space for color. It'll be filled in per MRT below. */ 1836 int color_mrf = nr; 1837 nr += 4 * reg_width; 1838 1839 if (c->source_depth_to_render_target) { 1840 if (intel->gen == 6 && c->dispatch_width == 16) { 1841 /* For outputting oDepth on gen6, SIMD8 writes have to be 1842 * used. This would require 8-wide moves of each half to 1843 * message regs, kind of like pre-gen5 SIMD16 FB writes. 1844 * Just bail on doing so for now. 1845 */ 1846 fail("Missing support for simd16 depth writes on gen6\n"); 1847 } 1848 1849 if (c->computes_depth) { 1850 /* Hand over gl_FragDepth. */ 1851 assert(this->frag_depth); 1852 fs_reg depth = *(variable_storage(this->frag_depth)); 1853 1854 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); 1855 } else { 1856 /* Pass through the payload depth. */ 1857 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 1858 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); 1859 } 1860 nr += reg_width; 1861 } 1862 1863 if (c->dest_depth_reg) { 1864 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), 1865 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); 1866 nr += reg_width; 1867 } 1868 1869 fs_reg color = reg_undef; 1870 if (this->frag_color) 1871 color = *(variable_storage(this->frag_color)); 1872 else if (this->frag_data) { 1873 color = *(variable_storage(this->frag_data)); 1874 color.type = BRW_REGISTER_TYPE_F; 1875 } 1876 1877 for (int target = 0; target < c->key.nr_color_regions; target++) { 1878 this->current_annotation = ralloc_asprintf(this->mem_ctx, 1879 "FB write target %d", 1880 target); 1881 if (this->frag_color || this->frag_data) { 1882 for (int i = 0; i < 4; i++) { 1883 emit_color_write(i, color_mrf, color); 1884 color.reg_offset++; 1885 } 1886 } 1887 1888 if (this->frag_color) 1889 color.reg_offset -= 4; 1890 1891 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 1892 inst->target = target; 1893 inst->base_mrf = base_mrf; 1894 inst->mlen = nr - base_mrf; 1895 if (target == c->key.nr_color_regions - 1) 1896 inst->eot = true; 1897 inst->header_present = header_present; 1898 } 1899 1900 if (c->key.nr_color_regions == 0) { 1901 if (c->key.alpha_test && (this->frag_color || this->frag_data)) { 1902 /* If the alpha test is enabled but there's no color buffer, 1903 * we still need to send alpha out the pipeline to our null 1904 * renderbuffer. 1905 */ 1906 color.reg_offset += 3; 1907 emit_color_write(3, color_mrf, color); 1908 } 1909 1910 fs_inst *inst = emit(FS_OPCODE_FB_WRITE); 1911 inst->base_mrf = base_mrf; 1912 inst->mlen = nr - base_mrf; 1913 inst->eot = true; 1914 inst->header_present = header_present; 1915 } 1916 1917 this->current_annotation = NULL; 1918} 1919