brw_fs.h revision 080b125c64b48447a515b1a169f779e62b3de13d
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#pragma once 29 30#include "brw_shader.h" 31 32extern "C" { 33 34#include <sys/types.h> 35 36#include "main/macros.h" 37#include "main/shaderobj.h" 38#include "main/uniforms.h" 39#include "program/prog_parameter.h" 40#include "program/prog_print.h" 41#include "program/prog_optimize.h" 42#include "program/register_allocate.h" 43#include "program/sampler.h" 44#include "program/hash_table.h" 45#include "brw_context.h" 46#include "brw_eu.h" 47#include "brw_wm.h" 48} 49#include "glsl/glsl_types.h" 50#include "glsl/ir.h" 51 52enum register_file { 53 BAD_FILE, 54 ARF, 55 GRF, 56 MRF, 57 IMM, 58 FIXED_HW_REG, /* a struct brw_reg */ 59 UNIFORM, /* prog_data->params[reg] */ 60}; 61 62class fs_reg { 63public: 64 /* Callers of this ralloc-based new need not call delete. It's 65 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 66 static void* operator new(size_t size, void *ctx) 67 { 68 void *node; 69 70 node = ralloc_size(ctx, size); 71 assert(node != NULL); 72 73 return node; 74 } 75 76 void init() 77 { 78 memset(this, 0, sizeof(*this)); 79 this->smear = -1; 80 } 81 82 /** Generic unset register constructor. */ 83 fs_reg() 84 { 85 init(); 86 this->file = BAD_FILE; 87 } 88 89 /** Immediate value constructor. */ 90 fs_reg(float f) 91 { 92 init(); 93 this->file = IMM; 94 this->type = BRW_REGISTER_TYPE_F; 95 this->imm.f = f; 96 } 97 98 /** Immediate value constructor. */ 99 fs_reg(int32_t i) 100 { 101 init(); 102 this->file = IMM; 103 this->type = BRW_REGISTER_TYPE_D; 104 this->imm.i = i; 105 } 106 107 /** Immediate value constructor. */ 108 fs_reg(uint32_t u) 109 { 110 init(); 111 this->file = IMM; 112 this->type = BRW_REGISTER_TYPE_UD; 113 this->imm.u = u; 114 } 115 116 /** Fixed brw_reg Immediate value constructor. */ 117 fs_reg(struct brw_reg fixed_hw_reg) 118 { 119 init(); 120 this->file = FIXED_HW_REG; 121 this->fixed_hw_reg = fixed_hw_reg; 122 this->type = fixed_hw_reg.type; 123 } 124 125 fs_reg(enum register_file file, int reg); 126 fs_reg(enum register_file file, int reg, uint32_t type); 127 fs_reg(class fs_visitor *v, const struct glsl_type *type); 128 129 bool equals(fs_reg *r) 130 { 131 return (file == r->file && 132 reg == r->reg && 133 reg_offset == r->reg_offset && 134 type == r->type && 135 negate == r->negate && 136 abs == r->abs && 137 memcmp(&fixed_hw_reg, &r->fixed_hw_reg, 138 sizeof(fixed_hw_reg)) == 0 && 139 smear == r->smear && 140 imm.u == r->imm.u); 141 } 142 143 /** Register file: ARF, GRF, MRF, IMM. */ 144 enum register_file file; 145 /** 146 * Register number. For ARF/MRF, it's the hardware register. For 147 * GRF, it's a virtual register number until register allocation 148 */ 149 int reg; 150 /** 151 * For virtual registers, this is a hardware register offset from 152 * the start of the register block (for example, a constant index 153 * in an array access). 154 */ 155 int reg_offset; 156 /** Register type. BRW_REGISTER_TYPE_* */ 157 int type; 158 bool negate; 159 bool abs; 160 bool sechalf; 161 struct brw_reg fixed_hw_reg; 162 int smear; /* -1, or a channel of the reg to smear to all channels. */ 163 164 /** Value for file == IMM */ 165 union { 166 int32_t i; 167 uint32_t u; 168 float f; 169 } imm; 170}; 171 172static const fs_reg reg_undef; 173static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 174static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 175 176class ip_record : public exec_node { 177public: 178 static void* operator new(size_t size, void *ctx) 179 { 180 void *node; 181 182 node = rzalloc_size(ctx, size); 183 assert(node != NULL); 184 185 return node; 186 } 187 188 ip_record(int ip) 189 { 190 this->ip = ip; 191 } 192 193 int ip; 194}; 195 196class fs_inst : public exec_node { 197public: 198 /* Callers of this ralloc-based new need not call delete. It's 199 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 200 static void* operator new(size_t size, void *ctx) 201 { 202 void *node; 203 204 node = rzalloc_size(ctx, size); 205 assert(node != NULL); 206 207 return node; 208 } 209 210 void init() 211 { 212 memset(this, 0, sizeof(*this)); 213 this->opcode = BRW_OPCODE_NOP; 214 this->conditional_mod = BRW_CONDITIONAL_NONE; 215 216 this->dst = reg_undef; 217 this->src[0] = reg_undef; 218 this->src[1] = reg_undef; 219 this->src[2] = reg_undef; 220 } 221 222 fs_inst() 223 { 224 init(); 225 } 226 227 fs_inst(enum opcode opcode) 228 { 229 init(); 230 this->opcode = opcode; 231 } 232 233 fs_inst(enum opcode opcode, fs_reg dst) 234 { 235 init(); 236 this->opcode = opcode; 237 this->dst = dst; 238 239 if (dst.file == GRF) 240 assert(dst.reg_offset >= 0); 241 } 242 243 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) 244 { 245 init(); 246 this->opcode = opcode; 247 this->dst = dst; 248 this->src[0] = src0; 249 250 if (dst.file == GRF) 251 assert(dst.reg_offset >= 0); 252 if (src[0].file == GRF) 253 assert(src[0].reg_offset >= 0); 254 } 255 256 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 257 { 258 init(); 259 this->opcode = opcode; 260 this->dst = dst; 261 this->src[0] = src0; 262 this->src[1] = src1; 263 264 if (dst.file == GRF) 265 assert(dst.reg_offset >= 0); 266 if (src[0].file == GRF) 267 assert(src[0].reg_offset >= 0); 268 if (src[1].file == GRF) 269 assert(src[1].reg_offset >= 0); 270 } 271 272 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 273 { 274 init(); 275 this->opcode = opcode; 276 this->dst = dst; 277 this->src[0] = src0; 278 this->src[1] = src1; 279 this->src[2] = src2; 280 281 if (dst.file == GRF) 282 assert(dst.reg_offset >= 0); 283 if (src[0].file == GRF) 284 assert(src[0].reg_offset >= 0); 285 if (src[1].file == GRF) 286 assert(src[1].reg_offset >= 0); 287 if (src[2].file == GRF) 288 assert(src[2].reg_offset >= 0); 289 } 290 291 bool equals(fs_inst *inst) 292 { 293 return (opcode == inst->opcode && 294 dst.equals(&inst->dst) && 295 src[0].equals(&inst->src[0]) && 296 src[1].equals(&inst->src[1]) && 297 src[2].equals(&inst->src[2]) && 298 saturate == inst->saturate && 299 predicated == inst->predicated && 300 conditional_mod == inst->conditional_mod && 301 mlen == inst->mlen && 302 base_mrf == inst->base_mrf && 303 sampler == inst->sampler && 304 target == inst->target && 305 eot == inst->eot && 306 header_present == inst->header_present && 307 shadow_compare == inst->shadow_compare && 308 offset == inst->offset); 309 } 310 311 int regs_written() 312 { 313 if (is_tex()) 314 return 4; 315 316 /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, 317 * but we don't currently use them...nor do we have an opcode for them. 318 */ 319 320 return 1; 321 } 322 323 bool is_tex() 324 { 325 return (opcode == SHADER_OPCODE_TEX || 326 opcode == FS_OPCODE_TXB || 327 opcode == SHADER_OPCODE_TXD || 328 opcode == SHADER_OPCODE_TXF || 329 opcode == SHADER_OPCODE_TXL || 330 opcode == SHADER_OPCODE_TXS); 331 } 332 333 bool is_math() 334 { 335 return (opcode == SHADER_OPCODE_RCP || 336 opcode == SHADER_OPCODE_RSQ || 337 opcode == SHADER_OPCODE_SQRT || 338 opcode == SHADER_OPCODE_EXP2 || 339 opcode == SHADER_OPCODE_LOG2 || 340 opcode == SHADER_OPCODE_SIN || 341 opcode == SHADER_OPCODE_COS || 342 opcode == SHADER_OPCODE_INT_QUOTIENT || 343 opcode == SHADER_OPCODE_INT_REMAINDER || 344 opcode == SHADER_OPCODE_POW); 345 } 346 347 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 348 fs_reg dst; 349 fs_reg src[3]; 350 bool saturate; 351 bool predicated; 352 bool predicate_inverse; 353 int conditional_mod; /**< BRW_CONDITIONAL_* */ 354 355 int mlen; /**< SEND message length */ 356 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 357 int sampler; 358 int target; /**< MRT target. */ 359 bool eot; 360 bool header_present; 361 bool shadow_compare; 362 bool force_uncompressed; 363 bool force_sechalf; 364 uint32_t offset; /* spill/unspill offset */ 365 366 /** @{ 367 * Annotation for the generated IR. One of the two can be set. 368 */ 369 ir_instruction *ir; 370 const char *annotation; 371 /** @} */ 372}; 373 374class fs_visitor : public ir_visitor 375{ 376public: 377 378 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 379 struct brw_shader *shader) 380 { 381 this->c = c; 382 this->p = &c->func; 383 this->brw = p->brw; 384 this->fp = (struct gl_fragment_program *) 385 prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; 386 this->prog = prog; 387 this->intel = &brw->intel; 388 this->ctx = &intel->ctx; 389 this->mem_ctx = ralloc_context(NULL); 390 this->shader = shader; 391 this->failed = false; 392 this->variable_ht = hash_table_ctor(0, 393 hash_table_pointer_hash, 394 hash_table_pointer_compare); 395 396 /* There's a question that appears to be left open in the spec: 397 * How do implicit dst conversions interact with the CMP 398 * instruction or conditional mods? On gen6, the instruction: 399 * 400 * CMP null<d> src0<f> src1<f> 401 * 402 * will do src1 - src0 and compare that result as if it was an 403 * integer. On gen4, it will do src1 - src0 as float, convert 404 * the result to int, and compare as int. In between, it 405 * appears that it does src1 - src0 and does the compare in the 406 * execution type so dst type doesn't matter. 407 */ 408 if (this->intel->gen > 4) 409 this->reg_null_cmp = reg_null_d; 410 else 411 this->reg_null_cmp = reg_null_f; 412 413 this->frag_depth = NULL; 414 memset(this->outputs, 0, sizeof(this->outputs)); 415 this->first_non_payload_grf = 0; 416 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; 417 418 this->current_annotation = NULL; 419 this->base_ir = NULL; 420 421 this->virtual_grf_sizes = NULL; 422 this->virtual_grf_next = 0; 423 this->virtual_grf_array_size = 0; 424 this->virtual_grf_def = NULL; 425 this->virtual_grf_use = NULL; 426 this->live_intervals_valid = false; 427 428 this->kill_emitted = false; 429 this->force_uncompressed_stack = 0; 430 this->force_sechalf_stack = 0; 431 } 432 433 ~fs_visitor() 434 { 435 ralloc_free(this->mem_ctx); 436 hash_table_dtor(this->variable_ht); 437 } 438 439 fs_reg *variable_storage(ir_variable *var); 440 int virtual_grf_alloc(int size); 441 void import_uniforms(fs_visitor *v); 442 443 void visit(ir_variable *ir); 444 void visit(ir_assignment *ir); 445 void visit(ir_dereference_variable *ir); 446 void visit(ir_dereference_record *ir); 447 void visit(ir_dereference_array *ir); 448 void visit(ir_expression *ir); 449 void visit(ir_texture *ir); 450 void visit(ir_if *ir); 451 void visit(ir_constant *ir); 452 void visit(ir_swizzle *ir); 453 void visit(ir_return *ir); 454 void visit(ir_loop *ir); 455 void visit(ir_loop_jump *ir); 456 void visit(ir_discard *ir); 457 void visit(ir_call *ir); 458 void visit(ir_function *ir); 459 void visit(ir_function_signature *ir); 460 461 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 462 463 fs_inst *emit(fs_inst inst); 464 465 fs_inst *emit(enum opcode opcode) 466 { 467 return emit(fs_inst(opcode)); 468 } 469 470 fs_inst *emit(enum opcode opcode, fs_reg dst) 471 { 472 return emit(fs_inst(opcode, dst)); 473 } 474 475 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) 476 { 477 return emit(fs_inst(opcode, dst, src0)); 478 } 479 480 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 481 { 482 return emit(fs_inst(opcode, dst, src0, src1)); 483 } 484 485 fs_inst *emit(enum opcode opcode, fs_reg dst, 486 fs_reg src0, fs_reg src1, fs_reg src2) 487 { 488 return emit(fs_inst(opcode, dst, src0, src1, src2)); 489 } 490 491 int type_size(const struct glsl_type *type); 492 fs_inst *get_instruction_generating_reg(fs_inst *start, 493 fs_inst *end, 494 fs_reg reg); 495 496 bool run(); 497 void setup_paramvalues_refs(); 498 void assign_curb_setup(); 499 void calculate_urb_setup(); 500 void assign_urb_setup(); 501 bool assign_regs(); 502 void assign_regs_trivial(); 503 int choose_spill_reg(struct ra_graph *g); 504 void spill_reg(int spill_reg); 505 void split_virtual_grfs(); 506 void setup_pull_constants(); 507 void calculate_live_intervals(); 508 bool propagate_constants(); 509 bool opt_algebraic(); 510 bool register_coalesce(); 511 bool compute_to_mrf(); 512 bool dead_code_eliminate(); 513 bool remove_dead_constants(); 514 bool remove_duplicate_mrf_writes(); 515 bool virtual_grf_interferes(int a, int b); 516 void schedule_instructions(); 517 void patch_discard_jumps_to_fb_writes(); 518 void fail(const char *msg, ...); 519 520 void push_force_uncompressed(); 521 void pop_force_uncompressed(); 522 void push_force_sechalf(); 523 void pop_force_sechalf(); 524 525 void generate_code(); 526 void generate_fb_write(fs_inst *inst); 527 void generate_pixel_xy(struct brw_reg dst, bool is_x); 528 void generate_linterp(fs_inst *inst, struct brw_reg dst, 529 struct brw_reg *src); 530 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 531 void generate_math1_gen7(fs_inst *inst, 532 struct brw_reg dst, 533 struct brw_reg src); 534 void generate_math2_gen7(fs_inst *inst, 535 struct brw_reg dst, 536 struct brw_reg src0, 537 struct brw_reg src1); 538 void generate_math1_gen6(fs_inst *inst, 539 struct brw_reg dst, 540 struct brw_reg src); 541 void generate_math2_gen6(fs_inst *inst, 542 struct brw_reg dst, 543 struct brw_reg src0, 544 struct brw_reg src1); 545 void generate_math_gen4(fs_inst *inst, 546 struct brw_reg dst, 547 struct brw_reg src); 548 void generate_discard(fs_inst *inst); 549 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 550 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 551 void generate_spill(fs_inst *inst, struct brw_reg src); 552 void generate_unspill(fs_inst *inst, struct brw_reg dst); 553 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 554 555 void emit_dummy_fs(); 556 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 557 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 558 fs_reg *emit_general_interpolation(ir_variable *ir); 559 void emit_interpolation_setup_gen4(); 560 void emit_interpolation_setup_gen6(); 561 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 562 int sampler); 563 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 564 int sampler); 565 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 566 int sampler); 567 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 568 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 569 bool try_emit_saturate(ir_expression *ir); 570 bool try_emit_mad(ir_expression *ir, int mul_arg); 571 void emit_bool_to_cond_code(ir_rvalue *condition); 572 void emit_if_gen6(ir_if *ir); 573 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 574 575 void emit_color_write(int target, int index, int first_color_mrf); 576 void emit_fb_writes(); 577 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 578 fs_reg dst, 579 fs_reg src, 580 fs_inst *pre_rhs_inst, 581 fs_inst *last_rhs_inst); 582 void emit_assignment_writes(fs_reg &l, fs_reg &r, 583 const glsl_type *type, bool predicated); 584 void resolve_ud_negate(fs_reg *reg); 585 586 struct brw_reg interp_reg(int location, int channel); 587 int setup_uniform_values(int loc, const glsl_type *type); 588 void setup_builtin_uniform_values(ir_variable *ir); 589 int implied_mrf_writes(fs_inst *inst); 590 591 struct brw_context *brw; 592 const struct gl_fragment_program *fp; 593 struct intel_context *intel; 594 struct gl_context *ctx; 595 struct brw_wm_compile *c; 596 struct brw_compile *p; 597 struct brw_shader *shader; 598 struct gl_shader_program *prog; 599 void *mem_ctx; 600 exec_list instructions; 601 exec_list discard_halt_patches; 602 603 /* Delayed setup of c->prog_data.params[] due to realloc of 604 * ParamValues[] during compile. 605 */ 606 int param_index[MAX_UNIFORMS * 4]; 607 int param_offset[MAX_UNIFORMS * 4]; 608 609 int *virtual_grf_sizes; 610 int virtual_grf_next; 611 int virtual_grf_array_size; 612 int *virtual_grf_def; 613 int *virtual_grf_use; 614 bool live_intervals_valid; 615 616 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 617 * the visitor to the packed uniform number after 618 * remove_dead_constants() that represents the actual uploaded 619 * uniform index. 620 */ 621 int *params_remap; 622 623 struct hash_table *variable_ht; 624 ir_variable *frag_depth; 625 fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; 626 int first_non_payload_grf; 627 int max_grf; 628 int urb_setup[FRAG_ATTRIB_MAX]; 629 bool kill_emitted; 630 631 /** @{ debug annotation info */ 632 const char *current_annotation; 633 ir_instruction *base_ir; 634 /** @} */ 635 636 bool failed; 637 char *fail_msg; 638 639 /* Result of last visit() method. */ 640 fs_reg result; 641 642 fs_reg pixel_x; 643 fs_reg pixel_y; 644 fs_reg wpos_w; 645 fs_reg pixel_w; 646 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 647 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 648 fs_reg reg_null_cmp; 649 650 int grf_used; 651 652 int force_uncompressed_stack; 653 int force_sechalf_stack; 654 655 class fs_bblock *bblock; 656}; 657 658bool brw_do_channel_expressions(struct exec_list *instructions); 659bool brw_do_vector_splitting(struct exec_list *instructions); 660bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 661