brw_fs.h revision d1056541e239dfcee0ad6af2fd2d9fab37dbf025
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#pragma once 29 30#include "brw_shader.h" 31 32extern "C" { 33 34#include <sys/types.h> 35 36#include "main/macros.h" 37#include "main/shaderobj.h" 38#include "main/uniforms.h" 39#include "program/prog_parameter.h" 40#include "program/prog_print.h" 41#include "program/prog_optimize.h" 42#include "program/register_allocate.h" 43#include "program/sampler.h" 44#include "program/hash_table.h" 45#include "brw_context.h" 46#include "brw_eu.h" 47#include "brw_wm.h" 48} 49#include "glsl/glsl_types.h" 50#include "glsl/ir.h" 51 52class fs_bblock; 53 54enum register_file { 55 BAD_FILE, 56 ARF, 57 GRF, 58 MRF, 59 IMM, 60 FIXED_HW_REG, /* a struct brw_reg */ 61 UNIFORM, /* prog_data->params[reg] */ 62}; 63 64class fs_reg { 65public: 66 /* Callers of this ralloc-based new need not call delete. It's 67 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 68 static void* operator new(size_t size, void *ctx) 69 { 70 void *node; 71 72 node = ralloc_size(ctx, size); 73 assert(node != NULL); 74 75 return node; 76 } 77 78 void init() 79 { 80 memset(this, 0, sizeof(*this)); 81 this->smear = -1; 82 } 83 84 /** Generic unset register constructor. */ 85 fs_reg() 86 { 87 init(); 88 this->file = BAD_FILE; 89 } 90 91 /** Immediate value constructor. */ 92 fs_reg(float f) 93 { 94 init(); 95 this->file = IMM; 96 this->type = BRW_REGISTER_TYPE_F; 97 this->imm.f = f; 98 } 99 100 /** Immediate value constructor. */ 101 fs_reg(int32_t i) 102 { 103 init(); 104 this->file = IMM; 105 this->type = BRW_REGISTER_TYPE_D; 106 this->imm.i = i; 107 } 108 109 /** Immediate value constructor. */ 110 fs_reg(uint32_t u) 111 { 112 init(); 113 this->file = IMM; 114 this->type = BRW_REGISTER_TYPE_UD; 115 this->imm.u = u; 116 } 117 118 /** Fixed brw_reg Immediate value constructor. */ 119 fs_reg(struct brw_reg fixed_hw_reg) 120 { 121 init(); 122 this->file = FIXED_HW_REG; 123 this->fixed_hw_reg = fixed_hw_reg; 124 this->type = fixed_hw_reg.type; 125 } 126 127 fs_reg(enum register_file file, int reg); 128 fs_reg(enum register_file file, int reg, uint32_t type); 129 fs_reg(class fs_visitor *v, const struct glsl_type *type); 130 131 bool equals(const fs_reg &r) const 132 { 133 return (file == r.file && 134 reg == r.reg && 135 reg_offset == r.reg_offset && 136 type == r.type && 137 negate == r.negate && 138 abs == r.abs && 139 memcmp(&fixed_hw_reg, &r.fixed_hw_reg, 140 sizeof(fixed_hw_reg)) == 0 && 141 smear == r.smear && 142 imm.u == r.imm.u); 143 } 144 145 /** Register file: ARF, GRF, MRF, IMM. */ 146 enum register_file file; 147 /** 148 * Register number. For ARF/MRF, it's the hardware register. For 149 * GRF, it's a virtual register number until register allocation 150 */ 151 int reg; 152 /** 153 * For virtual registers, this is a hardware register offset from 154 * the start of the register block (for example, a constant index 155 * in an array access). 156 */ 157 int reg_offset; 158 /** Register type. BRW_REGISTER_TYPE_* */ 159 int type; 160 bool negate; 161 bool abs; 162 bool sechalf; 163 struct brw_reg fixed_hw_reg; 164 int smear; /* -1, or a channel of the reg to smear to all channels. */ 165 166 /** Value for file == IMM */ 167 union { 168 int32_t i; 169 uint32_t u; 170 float f; 171 } imm; 172}; 173 174static const fs_reg reg_undef; 175static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 176static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 177 178class fs_inst : public exec_node { 179public: 180 /* Callers of this ralloc-based new need not call delete. It's 181 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 182 static void* operator new(size_t size, void *ctx) 183 { 184 void *node; 185 186 node = rzalloc_size(ctx, size); 187 assert(node != NULL); 188 189 return node; 190 } 191 192 void init() 193 { 194 memset(this, 0, sizeof(*this)); 195 this->opcode = BRW_OPCODE_NOP; 196 this->conditional_mod = BRW_CONDITIONAL_NONE; 197 198 this->dst = reg_undef; 199 this->src[0] = reg_undef; 200 this->src[1] = reg_undef; 201 this->src[2] = reg_undef; 202 } 203 204 fs_inst() 205 { 206 init(); 207 } 208 209 fs_inst(enum opcode opcode) 210 { 211 init(); 212 this->opcode = opcode; 213 } 214 215 fs_inst(enum opcode opcode, fs_reg dst) 216 { 217 init(); 218 this->opcode = opcode; 219 this->dst = dst; 220 221 if (dst.file == GRF) 222 assert(dst.reg_offset >= 0); 223 } 224 225 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) 226 { 227 init(); 228 this->opcode = opcode; 229 this->dst = dst; 230 this->src[0] = src0; 231 232 if (dst.file == GRF) 233 assert(dst.reg_offset >= 0); 234 if (src[0].file == GRF) 235 assert(src[0].reg_offset >= 0); 236 } 237 238 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 239 { 240 init(); 241 this->opcode = opcode; 242 this->dst = dst; 243 this->src[0] = src0; 244 this->src[1] = src1; 245 246 if (dst.file == GRF) 247 assert(dst.reg_offset >= 0); 248 if (src[0].file == GRF) 249 assert(src[0].reg_offset >= 0); 250 if (src[1].file == GRF) 251 assert(src[1].reg_offset >= 0); 252 } 253 254 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 255 { 256 init(); 257 this->opcode = opcode; 258 this->dst = dst; 259 this->src[0] = src0; 260 this->src[1] = src1; 261 this->src[2] = src2; 262 263 if (dst.file == GRF) 264 assert(dst.reg_offset >= 0); 265 if (src[0].file == GRF) 266 assert(src[0].reg_offset >= 0); 267 if (src[1].file == GRF) 268 assert(src[1].reg_offset >= 0); 269 if (src[2].file == GRF) 270 assert(src[2].reg_offset >= 0); 271 } 272 273 bool equals(fs_inst *inst) 274 { 275 return (opcode == inst->opcode && 276 dst.equals(inst->dst) && 277 src[0].equals(inst->src[0]) && 278 src[1].equals(inst->src[1]) && 279 src[2].equals(inst->src[2]) && 280 saturate == inst->saturate && 281 predicated == inst->predicated && 282 conditional_mod == inst->conditional_mod && 283 mlen == inst->mlen && 284 base_mrf == inst->base_mrf && 285 sampler == inst->sampler && 286 target == inst->target && 287 eot == inst->eot && 288 header_present == inst->header_present && 289 shadow_compare == inst->shadow_compare && 290 offset == inst->offset); 291 } 292 293 int regs_written() 294 { 295 if (is_tex()) 296 return 4; 297 298 /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, 299 * but we don't currently use them...nor do we have an opcode for them. 300 */ 301 302 return 1; 303 } 304 305 bool is_tex() 306 { 307 return (opcode == SHADER_OPCODE_TEX || 308 opcode == FS_OPCODE_TXB || 309 opcode == SHADER_OPCODE_TXD || 310 opcode == SHADER_OPCODE_TXF || 311 opcode == SHADER_OPCODE_TXL || 312 opcode == SHADER_OPCODE_TXS); 313 } 314 315 bool is_math() 316 { 317 return (opcode == SHADER_OPCODE_RCP || 318 opcode == SHADER_OPCODE_RSQ || 319 opcode == SHADER_OPCODE_SQRT || 320 opcode == SHADER_OPCODE_EXP2 || 321 opcode == SHADER_OPCODE_LOG2 || 322 opcode == SHADER_OPCODE_SIN || 323 opcode == SHADER_OPCODE_COS || 324 opcode == SHADER_OPCODE_INT_QUOTIENT || 325 opcode == SHADER_OPCODE_INT_REMAINDER || 326 opcode == SHADER_OPCODE_POW); 327 } 328 329 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 330 fs_reg dst; 331 fs_reg src[3]; 332 bool saturate; 333 bool predicated; 334 bool predicate_inverse; 335 int conditional_mod; /**< BRW_CONDITIONAL_* */ 336 337 int mlen; /**< SEND message length */ 338 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 339 int sampler; 340 int target; /**< MRT target. */ 341 bool eot; 342 bool header_present; 343 bool shadow_compare; 344 bool force_uncompressed; 345 bool force_sechalf; 346 uint32_t offset; /* spill/unspill offset */ 347 348 /** @{ 349 * Annotation for the generated IR. One of the two can be set. 350 */ 351 ir_instruction *ir; 352 const char *annotation; 353 /** @} */ 354}; 355 356class fs_visitor : public ir_visitor 357{ 358public: 359 360 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 361 struct brw_shader *shader) 362 { 363 this->c = c; 364 this->p = &c->func; 365 this->brw = p->brw; 366 this->fp = (struct gl_fragment_program *) 367 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 368 this->prog = prog; 369 this->intel = &brw->intel; 370 this->ctx = &intel->ctx; 371 this->mem_ctx = ralloc_context(NULL); 372 this->shader = shader; 373 this->failed = false; 374 this->variable_ht = hash_table_ctor(0, 375 hash_table_pointer_hash, 376 hash_table_pointer_compare); 377 378 /* There's a question that appears to be left open in the spec: 379 * How do implicit dst conversions interact with the CMP 380 * instruction or conditional mods? On gen6, the instruction: 381 * 382 * CMP null<d> src0<f> src1<f> 383 * 384 * will do src1 - src0 and compare that result as if it was an 385 * integer. On gen4, it will do src1 - src0 as float, convert 386 * the result to int, and compare as int. In between, it 387 * appears that it does src1 - src0 and does the compare in the 388 * execution type so dst type doesn't matter. 389 */ 390 if (this->intel->gen > 4) 391 this->reg_null_cmp = reg_null_d; 392 else 393 this->reg_null_cmp = reg_null_f; 394 395 this->frag_depth = NULL; 396 memset(this->outputs, 0, sizeof(this->outputs)); 397 this->first_non_payload_grf = 0; 398 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 399 400 this->current_annotation = NULL; 401 this->base_ir = NULL; 402 403 this->virtual_grf_sizes = NULL; 404 this->virtual_grf_next = 0; 405 this->virtual_grf_array_size = 0; 406 this->virtual_grf_def = NULL; 407 this->virtual_grf_use = NULL; 408 this->live_intervals_valid = false; 409 410 this->kill_emitted = false; 411 this->force_uncompressed_stack = 0; 412 this->force_sechalf_stack = 0; 413 } 414 415 ~fs_visitor() 416 { 417 ralloc_free(this->mem_ctx); 418 hash_table_dtor(this->variable_ht); 419 } 420 421 fs_reg *variable_storage(ir_variable *var); 422 int virtual_grf_alloc(int size); 423 void import_uniforms(fs_visitor *v); 424 425 void visit(ir_variable *ir); 426 void visit(ir_assignment *ir); 427 void visit(ir_dereference_variable *ir); 428 void visit(ir_dereference_record *ir); 429 void visit(ir_dereference_array *ir); 430 void visit(ir_expression *ir); 431 void visit(ir_texture *ir); 432 void visit(ir_if *ir); 433 void visit(ir_constant *ir); 434 void visit(ir_swizzle *ir); 435 void visit(ir_return *ir); 436 void visit(ir_loop *ir); 437 void visit(ir_loop_jump *ir); 438 void visit(ir_discard *ir); 439 void visit(ir_call *ir); 440 void visit(ir_function *ir); 441 void visit(ir_function_signature *ir); 442 443 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 444 445 fs_inst *emit(fs_inst inst); 446 447 fs_inst *emit(enum opcode opcode) 448 { 449 return emit(fs_inst(opcode)); 450 } 451 452 fs_inst *emit(enum opcode opcode, fs_reg dst) 453 { 454 return emit(fs_inst(opcode, dst)); 455 } 456 457 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) 458 { 459 return emit(fs_inst(opcode, dst, src0)); 460 } 461 462 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 463 { 464 return emit(fs_inst(opcode, dst, src0, src1)); 465 } 466 467 fs_inst *emit(enum opcode opcode, fs_reg dst, 468 fs_reg src0, fs_reg src1, fs_reg src2) 469 { 470 return emit(fs_inst(opcode, dst, src0, src1, src2)); 471 } 472 473 int type_size(const struct glsl_type *type); 474 fs_inst *get_instruction_generating_reg(fs_inst *start, 475 fs_inst *end, 476 fs_reg reg); 477 478 bool run(); 479 void setup_paramvalues_refs(); 480 void assign_curb_setup(); 481 void calculate_urb_setup(); 482 void assign_urb_setup(); 483 bool assign_regs(); 484 void assign_regs_trivial(); 485 int choose_spill_reg(struct ra_graph *g); 486 void spill_reg(int spill_reg); 487 void split_virtual_grfs(); 488 void setup_pull_constants(); 489 void calculate_live_intervals(); 490 bool propagate_constants(); 491 bool opt_algebraic(); 492 bool opt_cse(); 493 bool opt_cse_local(fs_bblock *block, exec_list *aeb); 494 bool opt_copy_propagate(); 495 bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, 496 exec_list *acp); 497 bool register_coalesce(); 498 bool register_coalesce_2(); 499 bool compute_to_mrf(); 500 bool dead_code_eliminate(); 501 bool remove_dead_constants(); 502 bool remove_duplicate_mrf_writes(); 503 bool virtual_grf_interferes(int a, int b); 504 void schedule_instructions(); 505 void fail(const char *msg, ...); 506 507 void push_force_uncompressed(); 508 void pop_force_uncompressed(); 509 void push_force_sechalf(); 510 void pop_force_sechalf(); 511 512 void generate_code(); 513 void generate_fb_write(fs_inst *inst); 514 void generate_pixel_xy(struct brw_reg dst, bool is_x); 515 void generate_linterp(fs_inst *inst, struct brw_reg dst, 516 struct brw_reg *src); 517 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 518 void generate_math1_gen7(fs_inst *inst, 519 struct brw_reg dst, 520 struct brw_reg src); 521 void generate_math2_gen7(fs_inst *inst, 522 struct brw_reg dst, 523 struct brw_reg src0, 524 struct brw_reg src1); 525 void generate_math1_gen6(fs_inst *inst, 526 struct brw_reg dst, 527 struct brw_reg src); 528 void generate_math2_gen6(fs_inst *inst, 529 struct brw_reg dst, 530 struct brw_reg src0, 531 struct brw_reg src1); 532 void generate_math_gen4(fs_inst *inst, 533 struct brw_reg dst, 534 struct brw_reg src); 535 void generate_discard(fs_inst *inst); 536 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 537 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, 538 bool negate_value); 539 void generate_spill(fs_inst *inst, struct brw_reg src); 540 void generate_unspill(fs_inst *inst, struct brw_reg dst); 541 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 542 543 void emit_dummy_fs(); 544 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 545 fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, 546 glsl_interp_qualifier interpolation_mode, 547 bool is_centroid); 548 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 549 fs_reg *emit_general_interpolation(ir_variable *ir); 550 void emit_interpolation_setup_gen4(); 551 void emit_interpolation_setup_gen6(); 552 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 553 int sampler); 554 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 555 int sampler); 556 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 557 int sampler); 558 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 559 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 560 bool try_emit_saturate(ir_expression *ir); 561 bool try_emit_mad(ir_expression *ir, int mul_arg); 562 void emit_bool_to_cond_code(ir_rvalue *condition); 563 void emit_if_gen6(ir_if *ir); 564 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 565 566 void emit_color_write(int target, int index, int first_color_mrf); 567 void emit_fb_writes(); 568 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 569 fs_reg dst, 570 fs_reg src, 571 fs_inst *pre_rhs_inst, 572 fs_inst *last_rhs_inst); 573 void emit_assignment_writes(fs_reg &l, fs_reg &r, 574 const glsl_type *type, bool predicated); 575 void resolve_ud_negate(fs_reg *reg); 576 void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); 577 578 struct brw_reg interp_reg(int location, int channel); 579 int setup_uniform_values(int loc, const glsl_type *type); 580 void setup_builtin_uniform_values(ir_variable *ir); 581 int implied_mrf_writes(fs_inst *inst); 582 583 struct brw_context *brw; 584 const struct gl_fragment_program *fp; 585 struct intel_context *intel; 586 struct gl_context *ctx; 587 struct brw_wm_compile *c; 588 struct brw_compile *p; 589 struct brw_shader *shader; 590 struct gl_shader_program *prog; 591 void *mem_ctx; 592 exec_list instructions; 593 594 /* Delayed setup of c->prog_data.params[] due to realloc of 595 * ParamValues[] during compile. 596 */ 597 int param_index[MAX_UNIFORMS * 4]; 598 int param_offset[MAX_UNIFORMS * 4]; 599 600 int *virtual_grf_sizes; 601 int virtual_grf_next; 602 int virtual_grf_array_size; 603 int *virtual_grf_def; 604 int *virtual_grf_use; 605 bool live_intervals_valid; 606 607 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 608 * the visitor to the packed uniform number after 609 * remove_dead_constants() that represents the actual uploaded 610 * uniform index. 611 */ 612 int *params_remap; 613 614 struct hash_table *variable_ht; 615 ir_variable *frag_depth; 616 fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; 617 unsigned output_components[BRW_MAX_DRAW_BUFFERS]; 618 fs_reg dual_src_output; 619 int first_non_payload_grf; 620 int max_grf; 621 int urb_setup[FRAG_ATTRIB_MAX]; 622 bool kill_emitted; 623 624 /** @{ debug annotation info */ 625 const char *current_annotation; 626 ir_instruction *base_ir; 627 /** @} */ 628 629 bool failed; 630 char *fail_msg; 631 632 /* Result of last visit() method. */ 633 fs_reg result; 634 635 fs_reg pixel_x; 636 fs_reg pixel_y; 637 fs_reg wpos_w; 638 fs_reg pixel_w; 639 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 640 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 641 fs_reg reg_null_cmp; 642 643 int grf_used; 644 645 int force_uncompressed_stack; 646 int force_sechalf_stack; 647 648 class fs_bblock *bblock; 649}; 650 651bool brw_do_channel_expressions(struct exec_list *instructions); 652bool brw_do_vector_splitting(struct exec_list *instructions); 653bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 654