brw_fs.h revision 458f7f014139deb48a4cf0a9e6bdca3a57d24208
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#pragma once 29 30#include "brw_shader.h" 31 32extern "C" { 33 34#include <sys/types.h> 35 36#include "main/macros.h" 37#include "main/shaderobj.h" 38#include "main/uniforms.h" 39#include "program/prog_parameter.h" 40#include "program/prog_print.h" 41#include "program/prog_optimize.h" 42#include "program/register_allocate.h" 43#include "program/sampler.h" 44#include "program/hash_table.h" 45#include "brw_context.h" 46#include "brw_eu.h" 47#include "brw_wm.h" 48} 49#include "glsl/glsl_types.h" 50#include "glsl/ir.h" 51 52class fs_bblock; 53namespace { 54 class acp_entry; 55} 56 57enum register_file { 58 BAD_FILE, 59 ARF, 60 GRF, 61 MRF, 62 IMM, 63 FIXED_HW_REG, /* a struct brw_reg */ 64 UNIFORM, /* prog_data->params[reg] */ 65}; 66 67class fs_reg { 68public: 69 /* Callers of this ralloc-based new need not call delete. It's 70 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 71 static void* operator new(size_t size, void *ctx) 72 { 73 void *node; 74 75 node = ralloc_size(ctx, size); 76 assert(node != NULL); 77 78 return node; 79 } 80 81 void init() 82 { 83 memset(this, 0, sizeof(*this)); 84 this->smear = -1; 85 } 86 87 /** Generic unset register constructor. */ 88 fs_reg() 89 { 90 init(); 91 this->file = BAD_FILE; 92 } 93 94 /** Immediate value constructor. */ 95 fs_reg(float f) 96 { 97 init(); 98 this->file = IMM; 99 this->type = BRW_REGISTER_TYPE_F; 100 this->imm.f = f; 101 } 102 103 /** Immediate value constructor. */ 104 fs_reg(int32_t i) 105 { 106 init(); 107 this->file = IMM; 108 this->type = BRW_REGISTER_TYPE_D; 109 this->imm.i = i; 110 } 111 112 /** Immediate value constructor. */ 113 fs_reg(uint32_t u) 114 { 115 init(); 116 this->file = IMM; 117 this->type = BRW_REGISTER_TYPE_UD; 118 this->imm.u = u; 119 } 120 121 /** Fixed brw_reg Immediate value constructor. */ 122 fs_reg(struct brw_reg fixed_hw_reg) 123 { 124 init(); 125 this->file = FIXED_HW_REG; 126 this->fixed_hw_reg = fixed_hw_reg; 127 this->type = fixed_hw_reg.type; 128 } 129 130 fs_reg(enum register_file file, int reg); 131 fs_reg(enum register_file file, int reg, uint32_t type); 132 fs_reg(class fs_visitor *v, const struct glsl_type *type); 133 134 bool equals(const fs_reg &r) const 135 { 136 return (file == r.file && 137 reg == r.reg && 138 reg_offset == r.reg_offset && 139 type == r.type && 140 negate == r.negate && 141 abs == r.abs && 142 memcmp(&fixed_hw_reg, &r.fixed_hw_reg, 143 sizeof(fixed_hw_reg)) == 0 && 144 smear == r.smear && 145 imm.u == r.imm.u); 146 } 147 148 /** Register file: ARF, GRF, MRF, IMM. */ 149 enum register_file file; 150 /** 151 * Register number. For ARF/MRF, it's the hardware register. For 152 * GRF, it's a virtual register number until register allocation 153 */ 154 int reg; 155 /** 156 * For virtual registers, this is a hardware register offset from 157 * the start of the register block (for example, a constant index 158 * in an array access). 159 */ 160 int reg_offset; 161 /** Register type. BRW_REGISTER_TYPE_* */ 162 int type; 163 bool negate; 164 bool abs; 165 bool sechalf; 166 struct brw_reg fixed_hw_reg; 167 int smear; /* -1, or a channel of the reg to smear to all channels. */ 168 169 /** Value for file == IMM */ 170 union { 171 int32_t i; 172 uint32_t u; 173 float f; 174 } imm; 175}; 176 177static const fs_reg reg_undef; 178static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 179static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 180 181class fs_inst : public exec_node { 182public: 183 /* Callers of this ralloc-based new need not call delete. It's 184 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 185 static void* operator new(size_t size, void *ctx) 186 { 187 void *node; 188 189 node = rzalloc_size(ctx, size); 190 assert(node != NULL); 191 192 return node; 193 } 194 195 void init() 196 { 197 memset(this, 0, sizeof(*this)); 198 this->opcode = BRW_OPCODE_NOP; 199 this->conditional_mod = BRW_CONDITIONAL_NONE; 200 201 this->dst = reg_undef; 202 this->src[0] = reg_undef; 203 this->src[1] = reg_undef; 204 this->src[2] = reg_undef; 205 } 206 207 fs_inst() 208 { 209 init(); 210 } 211 212 fs_inst(enum opcode opcode) 213 { 214 init(); 215 this->opcode = opcode; 216 } 217 218 fs_inst(enum opcode opcode, fs_reg dst) 219 { 220 init(); 221 this->opcode = opcode; 222 this->dst = dst; 223 224 if (dst.file == GRF) 225 assert(dst.reg_offset >= 0); 226 } 227 228 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) 229 { 230 init(); 231 this->opcode = opcode; 232 this->dst = dst; 233 this->src[0] = src0; 234 235 if (dst.file == GRF) 236 assert(dst.reg_offset >= 0); 237 if (src[0].file == GRF) 238 assert(src[0].reg_offset >= 0); 239 } 240 241 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 242 { 243 init(); 244 this->opcode = opcode; 245 this->dst = dst; 246 this->src[0] = src0; 247 this->src[1] = src1; 248 249 if (dst.file == GRF) 250 assert(dst.reg_offset >= 0); 251 if (src[0].file == GRF) 252 assert(src[0].reg_offset >= 0); 253 if (src[1].file == GRF) 254 assert(src[1].reg_offset >= 0); 255 } 256 257 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 258 { 259 init(); 260 this->opcode = opcode; 261 this->dst = dst; 262 this->src[0] = src0; 263 this->src[1] = src1; 264 this->src[2] = src2; 265 266 if (dst.file == GRF) 267 assert(dst.reg_offset >= 0); 268 if (src[0].file == GRF) 269 assert(src[0].reg_offset >= 0); 270 if (src[1].file == GRF) 271 assert(src[1].reg_offset >= 0); 272 if (src[2].file == GRF) 273 assert(src[2].reg_offset >= 0); 274 } 275 276 bool equals(fs_inst *inst) 277 { 278 return (opcode == inst->opcode && 279 dst.equals(inst->dst) && 280 src[0].equals(inst->src[0]) && 281 src[1].equals(inst->src[1]) && 282 src[2].equals(inst->src[2]) && 283 saturate == inst->saturate && 284 predicated == inst->predicated && 285 conditional_mod == inst->conditional_mod && 286 mlen == inst->mlen && 287 base_mrf == inst->base_mrf && 288 sampler == inst->sampler && 289 target == inst->target && 290 eot == inst->eot && 291 header_present == inst->header_present && 292 shadow_compare == inst->shadow_compare && 293 offset == inst->offset); 294 } 295 296 int regs_written() 297 { 298 if (is_tex()) 299 return 4; 300 301 /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, 302 * but we don't currently use them...nor do we have an opcode for them. 303 */ 304 305 return 1; 306 } 307 308 bool is_tex() 309 { 310 return (opcode == SHADER_OPCODE_TEX || 311 opcode == FS_OPCODE_TXB || 312 opcode == SHADER_OPCODE_TXD || 313 opcode == SHADER_OPCODE_TXF || 314 opcode == SHADER_OPCODE_TXL || 315 opcode == SHADER_OPCODE_TXS); 316 } 317 318 bool is_math() 319 { 320 return (opcode == SHADER_OPCODE_RCP || 321 opcode == SHADER_OPCODE_RSQ || 322 opcode == SHADER_OPCODE_SQRT || 323 opcode == SHADER_OPCODE_EXP2 || 324 opcode == SHADER_OPCODE_LOG2 || 325 opcode == SHADER_OPCODE_SIN || 326 opcode == SHADER_OPCODE_COS || 327 opcode == SHADER_OPCODE_INT_QUOTIENT || 328 opcode == SHADER_OPCODE_INT_REMAINDER || 329 opcode == SHADER_OPCODE_POW); 330 } 331 332 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 333 fs_reg dst; 334 fs_reg src[3]; 335 bool saturate; 336 bool predicated; 337 bool predicate_inverse; 338 int conditional_mod; /**< BRW_CONDITIONAL_* */ 339 340 int mlen; /**< SEND message length */ 341 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 342 int sampler; 343 int target; /**< MRT target. */ 344 bool eot; 345 bool header_present; 346 bool shadow_compare; 347 bool force_uncompressed; 348 bool force_sechalf; 349 uint32_t offset; /* spill/unspill offset */ 350 351 /** @{ 352 * Annotation for the generated IR. One of the two can be set. 353 */ 354 ir_instruction *ir; 355 const char *annotation; 356 /** @} */ 357}; 358 359class fs_visitor : public ir_visitor 360{ 361public: 362 363 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 364 struct brw_shader *shader) 365 { 366 this->c = c; 367 this->p = &c->func; 368 this->brw = p->brw; 369 this->fp = (struct gl_fragment_program *) 370 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 371 this->prog = prog; 372 this->intel = &brw->intel; 373 this->ctx = &intel->ctx; 374 this->mem_ctx = ralloc_context(NULL); 375 this->shader = shader; 376 this->failed = false; 377 this->variable_ht = hash_table_ctor(0, 378 hash_table_pointer_hash, 379 hash_table_pointer_compare); 380 381 /* There's a question that appears to be left open in the spec: 382 * How do implicit dst conversions interact with the CMP 383 * instruction or conditional mods? On gen6, the instruction: 384 * 385 * CMP null<d> src0<f> src1<f> 386 * 387 * will do src1 - src0 and compare that result as if it was an 388 * integer. On gen4, it will do src1 - src0 as float, convert 389 * the result to int, and compare as int. In between, it 390 * appears that it does src1 - src0 and does the compare in the 391 * execution type so dst type doesn't matter. 392 */ 393 if (this->intel->gen > 4) 394 this->reg_null_cmp = reg_null_d; 395 else 396 this->reg_null_cmp = reg_null_f; 397 398 this->frag_depth = NULL; 399 memset(this->outputs, 0, sizeof(this->outputs)); 400 this->first_non_payload_grf = 0; 401 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 402 403 this->current_annotation = NULL; 404 this->base_ir = NULL; 405 406 this->virtual_grf_sizes = NULL; 407 this->virtual_grf_next = 0; 408 this->virtual_grf_array_size = 0; 409 this->virtual_grf_def = NULL; 410 this->virtual_grf_use = NULL; 411 this->live_intervals_valid = false; 412 413 this->kill_emitted = false; 414 this->force_uncompressed_stack = 0; 415 this->force_sechalf_stack = 0; 416 } 417 418 ~fs_visitor() 419 { 420 ralloc_free(this->mem_ctx); 421 hash_table_dtor(this->variable_ht); 422 } 423 424 fs_reg *variable_storage(ir_variable *var); 425 int virtual_grf_alloc(int size); 426 void import_uniforms(fs_visitor *v); 427 428 void visit(ir_variable *ir); 429 void visit(ir_assignment *ir); 430 void visit(ir_dereference_variable *ir); 431 void visit(ir_dereference_record *ir); 432 void visit(ir_dereference_array *ir); 433 void visit(ir_expression *ir); 434 void visit(ir_texture *ir); 435 void visit(ir_if *ir); 436 void visit(ir_constant *ir); 437 void visit(ir_swizzle *ir); 438 void visit(ir_return *ir); 439 void visit(ir_loop *ir); 440 void visit(ir_loop_jump *ir); 441 void visit(ir_discard *ir); 442 void visit(ir_call *ir); 443 void visit(ir_function *ir); 444 void visit(ir_function_signature *ir); 445 446 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 447 448 fs_inst *emit(fs_inst inst); 449 450 fs_inst *emit(enum opcode opcode) 451 { 452 return emit(fs_inst(opcode)); 453 } 454 455 fs_inst *emit(enum opcode opcode, fs_reg dst) 456 { 457 return emit(fs_inst(opcode, dst)); 458 } 459 460 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) 461 { 462 return emit(fs_inst(opcode, dst, src0)); 463 } 464 465 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 466 { 467 return emit(fs_inst(opcode, dst, src0, src1)); 468 } 469 470 fs_inst *emit(enum opcode opcode, fs_reg dst, 471 fs_reg src0, fs_reg src1, fs_reg src2) 472 { 473 return emit(fs_inst(opcode, dst, src0, src1, src2)); 474 } 475 476 int type_size(const struct glsl_type *type); 477 fs_inst *get_instruction_generating_reg(fs_inst *start, 478 fs_inst *end, 479 fs_reg reg); 480 481 bool run(); 482 void setup_paramvalues_refs(); 483 void assign_curb_setup(); 484 void calculate_urb_setup(); 485 void assign_urb_setup(); 486 bool assign_regs(); 487 void assign_regs_trivial(); 488 int choose_spill_reg(struct ra_graph *g); 489 void spill_reg(int spill_reg); 490 void split_virtual_grfs(); 491 void setup_pull_constants(); 492 void calculate_live_intervals(); 493 bool propagate_constants(); 494 bool opt_algebraic(); 495 bool opt_cse(); 496 bool opt_cse_local(fs_bblock *block, exec_list *aeb); 497 bool opt_copy_propagate(); 498 bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); 499 bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, 500 exec_list *acp); 501 bool register_coalesce(); 502 bool register_coalesce_2(); 503 bool compute_to_mrf(); 504 bool dead_code_eliminate(); 505 bool remove_dead_constants(); 506 bool remove_duplicate_mrf_writes(); 507 bool virtual_grf_interferes(int a, int b); 508 void schedule_instructions(); 509 void fail(const char *msg, ...); 510 511 void push_force_uncompressed(); 512 void pop_force_uncompressed(); 513 void push_force_sechalf(); 514 void pop_force_sechalf(); 515 516 void generate_code(); 517 void generate_fb_write(fs_inst *inst); 518 void generate_pixel_xy(struct brw_reg dst, bool is_x); 519 void generate_linterp(fs_inst *inst, struct brw_reg dst, 520 struct brw_reg *src); 521 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 522 void generate_math1_gen7(fs_inst *inst, 523 struct brw_reg dst, 524 struct brw_reg src); 525 void generate_math2_gen7(fs_inst *inst, 526 struct brw_reg dst, 527 struct brw_reg src0, 528 struct brw_reg src1); 529 void generate_math1_gen6(fs_inst *inst, 530 struct brw_reg dst, 531 struct brw_reg src); 532 void generate_math2_gen6(fs_inst *inst, 533 struct brw_reg dst, 534 struct brw_reg src0, 535 struct brw_reg src1); 536 void generate_math_gen4(fs_inst *inst, 537 struct brw_reg dst, 538 struct brw_reg src); 539 void generate_discard(fs_inst *inst); 540 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 541 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, 542 bool negate_value); 543 void generate_spill(fs_inst *inst, struct brw_reg src); 544 void generate_unspill(fs_inst *inst, struct brw_reg dst); 545 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 546 void generate_mov_dispatch_to_flags(); 547 548 void emit_dummy_fs(); 549 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 550 fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, 551 glsl_interp_qualifier interpolation_mode, 552 bool is_centroid); 553 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 554 fs_reg *emit_general_interpolation(ir_variable *ir); 555 void emit_interpolation_setup_gen4(); 556 void emit_interpolation_setup_gen6(); 557 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 558 int sampler); 559 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 560 int sampler); 561 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 562 int sampler); 563 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 564 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 565 bool try_emit_saturate(ir_expression *ir); 566 bool try_emit_mad(ir_expression *ir, int mul_arg); 567 void emit_bool_to_cond_code(ir_rvalue *condition); 568 void emit_if_gen6(ir_if *ir); 569 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 570 571 void emit_color_write(int target, int index, int first_color_mrf); 572 void emit_fb_writes(); 573 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 574 fs_reg dst, 575 fs_reg src, 576 fs_inst *pre_rhs_inst, 577 fs_inst *last_rhs_inst); 578 void emit_assignment_writes(fs_reg &l, fs_reg &r, 579 const glsl_type *type, bool predicated); 580 void resolve_ud_negate(fs_reg *reg); 581 void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); 582 583 struct brw_reg interp_reg(int location, int channel); 584 int setup_uniform_values(int loc, const glsl_type *type); 585 void setup_builtin_uniform_values(ir_variable *ir); 586 int implied_mrf_writes(fs_inst *inst); 587 588 struct brw_context *brw; 589 const struct gl_fragment_program *fp; 590 struct intel_context *intel; 591 struct gl_context *ctx; 592 struct brw_wm_compile *c; 593 struct brw_compile *p; 594 struct brw_shader *shader; 595 struct gl_shader_program *prog; 596 void *mem_ctx; 597 exec_list instructions; 598 599 /* Delayed setup of c->prog_data.params[] due to realloc of 600 * ParamValues[] during compile. 601 */ 602 int param_index[MAX_UNIFORMS * 4]; 603 int param_offset[MAX_UNIFORMS * 4]; 604 605 int *virtual_grf_sizes; 606 int virtual_grf_next; 607 int virtual_grf_array_size; 608 int *virtual_grf_def; 609 int *virtual_grf_use; 610 bool live_intervals_valid; 611 612 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 613 * the visitor to the packed uniform number after 614 * remove_dead_constants() that represents the actual uploaded 615 * uniform index. 616 */ 617 int *params_remap; 618 619 struct hash_table *variable_ht; 620 ir_variable *frag_depth; 621 fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; 622 unsigned output_components[BRW_MAX_DRAW_BUFFERS]; 623 fs_reg dual_src_output; 624 int first_non_payload_grf; 625 int max_grf; 626 int urb_setup[FRAG_ATTRIB_MAX]; 627 bool kill_emitted; 628 629 /** @{ debug annotation info */ 630 const char *current_annotation; 631 ir_instruction *base_ir; 632 /** @} */ 633 634 bool failed; 635 char *fail_msg; 636 637 /* Result of last visit() method. */ 638 fs_reg result; 639 640 fs_reg pixel_x; 641 fs_reg pixel_y; 642 fs_reg wpos_w; 643 fs_reg pixel_w; 644 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 645 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 646 fs_reg reg_null_cmp; 647 648 int grf_used; 649 650 int force_uncompressed_stack; 651 int force_sechalf_stack; 652 653 class fs_bblock *bblock; 654}; 655 656bool brw_do_channel_expressions(struct exec_list *instructions); 657bool brw_do_vector_splitting(struct exec_list *instructions); 658bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 659