brw_fs.h revision ecf8963754489abfb5097c130a9bcd4cdb76b6bd
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28#include "brw_shader.h" 29 30extern "C" { 31 32#include <sys/types.h> 33 34#include "main/macros.h" 35#include "main/shaderobj.h" 36#include "main/uniforms.h" 37#include "program/prog_parameter.h" 38#include "program/prog_print.h" 39#include "program/prog_optimize.h" 40#include "program/register_allocate.h" 41#include "program/sampler.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46} 47#include "../glsl/glsl_types.h" 48#include "../glsl/ir.h" 49 50enum register_file { 51 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 52 GRF = BRW_GENERAL_REGISTER_FILE, 53 MRF = BRW_MESSAGE_REGISTER_FILE, 54 IMM = BRW_IMMEDIATE_VALUE, 55 FIXED_HW_REG, /* a struct brw_reg */ 56 UNIFORM, /* prog_data->params[reg] */ 57 BAD_FILE 58}; 59 60class fs_reg { 61public: 62 /* Callers of this ralloc-based new need not call delete. It's 63 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 64 static void* operator new(size_t size, void *ctx) 65 { 66 void *node; 67 68 node = ralloc_size(ctx, size); 69 assert(node != NULL); 70 71 return node; 72 } 73 74 void init() 75 { 76 memset(this, 0, sizeof(*this)); 77 this->smear = -1; 78 } 79 80 /** Generic unset register constructor. */ 81 fs_reg() 82 { 83 init(); 84 this->file = BAD_FILE; 85 } 86 87 /** Immediate value constructor. */ 88 fs_reg(float f) 89 { 90 init(); 91 this->file = IMM; 92 this->type = BRW_REGISTER_TYPE_F; 93 this->imm.f = f; 94 } 95 96 /** Immediate value constructor. */ 97 fs_reg(int32_t i) 98 { 99 init(); 100 this->file = IMM; 101 this->type = BRW_REGISTER_TYPE_D; 102 this->imm.i = i; 103 } 104 105 /** Immediate value constructor. */ 106 fs_reg(uint32_t u) 107 { 108 init(); 109 this->file = IMM; 110 this->type = BRW_REGISTER_TYPE_UD; 111 this->imm.u = u; 112 } 113 114 /** Fixed brw_reg Immediate value constructor. */ 115 fs_reg(struct brw_reg fixed_hw_reg) 116 { 117 init(); 118 this->file = FIXED_HW_REG; 119 this->fixed_hw_reg = fixed_hw_reg; 120 this->type = fixed_hw_reg.type; 121 } 122 123 fs_reg(enum register_file file, int reg); 124 fs_reg(enum register_file file, int reg, uint32_t type); 125 fs_reg(class fs_visitor *v, const struct glsl_type *type); 126 127 bool equals(fs_reg *r) 128 { 129 return (file == r->file && 130 reg == r->reg && 131 reg_offset == r->reg_offset && 132 type == r->type && 133 negate == r->negate && 134 abs == r->abs && 135 memcmp(&fixed_hw_reg, &r->fixed_hw_reg, 136 sizeof(fixed_hw_reg)) == 0 && 137 smear == r->smear && 138 imm.u == r->imm.u); 139 } 140 141 /** Register file: ARF, GRF, MRF, IMM. */ 142 enum register_file file; 143 /** 144 * Register number. For ARF/MRF, it's the hardware register. For 145 * GRF, it's a virtual register number until register allocation 146 */ 147 int reg; 148 /** 149 * For virtual registers, this is a hardware register offset from 150 * the start of the register block (for example, a constant index 151 * in an array access). 152 */ 153 int reg_offset; 154 /** Register type. BRW_REGISTER_TYPE_* */ 155 int type; 156 bool negate; 157 bool abs; 158 bool sechalf; 159 struct brw_reg fixed_hw_reg; 160 int smear; /* -1, or a channel of the reg to smear to all channels. */ 161 162 /** Value for file == BRW_IMMMEDIATE_FILE */ 163 union { 164 int32_t i; 165 uint32_t u; 166 float f; 167 } imm; 168}; 169 170static const fs_reg reg_undef; 171static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 172static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 173 174class fs_inst : public exec_node { 175public: 176 /* Callers of this ralloc-based new need not call delete. It's 177 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 178 static void* operator new(size_t size, void *ctx) 179 { 180 void *node; 181 182 node = rzalloc_size(ctx, size); 183 assert(node != NULL); 184 185 return node; 186 } 187 188 void init() 189 { 190 memset(this, 0, sizeof(*this)); 191 this->opcode = BRW_OPCODE_NOP; 192 this->conditional_mod = BRW_CONDITIONAL_NONE; 193 194 this->dst = reg_undef; 195 this->src[0] = reg_undef; 196 this->src[1] = reg_undef; 197 this->src[2] = reg_undef; 198 } 199 200 fs_inst() 201 { 202 init(); 203 } 204 205 fs_inst(enum opcode opcode) 206 { 207 init(); 208 this->opcode = opcode; 209 } 210 211 fs_inst(enum opcode opcode, fs_reg dst) 212 { 213 init(); 214 this->opcode = opcode; 215 this->dst = dst; 216 217 if (dst.file == GRF) 218 assert(dst.reg_offset >= 0); 219 } 220 221 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) 222 { 223 init(); 224 this->opcode = opcode; 225 this->dst = dst; 226 this->src[0] = src0; 227 228 if (dst.file == GRF) 229 assert(dst.reg_offset >= 0); 230 if (src[0].file == GRF) 231 assert(src[0].reg_offset >= 0); 232 } 233 234 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 235 { 236 init(); 237 this->opcode = opcode; 238 this->dst = dst; 239 this->src[0] = src0; 240 this->src[1] = src1; 241 242 if (dst.file == GRF) 243 assert(dst.reg_offset >= 0); 244 if (src[0].file == GRF) 245 assert(src[0].reg_offset >= 0); 246 if (src[1].file == GRF) 247 assert(src[1].reg_offset >= 0); 248 } 249 250 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 251 { 252 init(); 253 this->opcode = opcode; 254 this->dst = dst; 255 this->src[0] = src0; 256 this->src[1] = src1; 257 this->src[2] = src2; 258 259 if (dst.file == GRF) 260 assert(dst.reg_offset >= 0); 261 if (src[0].file == GRF) 262 assert(src[0].reg_offset >= 0); 263 if (src[1].file == GRF) 264 assert(src[1].reg_offset >= 0); 265 if (src[2].file == GRF) 266 assert(src[2].reg_offset >= 0); 267 } 268 269 bool equals(fs_inst *inst) 270 { 271 return (opcode == inst->opcode && 272 dst.equals(&inst->dst) && 273 src[0].equals(&inst->src[0]) && 274 src[1].equals(&inst->src[1]) && 275 src[2].equals(&inst->src[2]) && 276 saturate == inst->saturate && 277 predicated == inst->predicated && 278 conditional_mod == inst->conditional_mod && 279 mlen == inst->mlen && 280 base_mrf == inst->base_mrf && 281 sampler == inst->sampler && 282 target == inst->target && 283 eot == inst->eot && 284 header_present == inst->header_present && 285 shadow_compare == inst->shadow_compare && 286 offset == inst->offset); 287 } 288 289 bool is_tex() 290 { 291 return (opcode == FS_OPCODE_TEX || 292 opcode == FS_OPCODE_TXB || 293 opcode == FS_OPCODE_TXD || 294 opcode == FS_OPCODE_TXL || 295 opcode == FS_OPCODE_TXS); 296 } 297 298 bool is_math() 299 { 300 return (opcode == SHADER_OPCODE_RCP || 301 opcode == SHADER_OPCODE_RSQ || 302 opcode == SHADER_OPCODE_SQRT || 303 opcode == SHADER_OPCODE_EXP2 || 304 opcode == SHADER_OPCODE_LOG2 || 305 opcode == SHADER_OPCODE_SIN || 306 opcode == SHADER_OPCODE_COS || 307 opcode == SHADER_OPCODE_POW); 308 } 309 310 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 311 fs_reg dst; 312 fs_reg src[3]; 313 bool saturate; 314 bool predicated; 315 bool predicate_inverse; 316 int conditional_mod; /**< BRW_CONDITIONAL_* */ 317 318 int mlen; /**< SEND message length */ 319 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 320 int sampler; 321 int target; /**< MRT target. */ 322 bool eot; 323 bool header_present; 324 bool shadow_compare; 325 bool force_uncompressed; 326 bool force_sechalf; 327 uint32_t offset; /* spill/unspill offset */ 328 329 /** @{ 330 * Annotation for the generated IR. One of the two can be set. 331 */ 332 ir_instruction *ir; 333 const char *annotation; 334 /** @} */ 335}; 336 337class fs_visitor : public ir_visitor 338{ 339public: 340 341 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 342 struct brw_shader *shader) 343 { 344 this->c = c; 345 this->p = &c->func; 346 this->brw = p->brw; 347 this->fp = prog->FragmentProgram; 348 this->prog = prog; 349 this->intel = &brw->intel; 350 this->ctx = &intel->ctx; 351 this->mem_ctx = ralloc_context(NULL); 352 this->shader = shader; 353 this->failed = false; 354 this->variable_ht = hash_table_ctor(0, 355 hash_table_pointer_hash, 356 hash_table_pointer_compare); 357 358 /* There's a question that appears to be left open in the spec: 359 * How do implicit dst conversions interact with the CMP 360 * instruction or conditional mods? On gen6, the instruction: 361 * 362 * CMP null<d> src0<f> src1<f> 363 * 364 * will do src1 - src0 and compare that result as if it was an 365 * integer. On gen4, it will do src1 - src0 as float, convert 366 * the result to int, and compare as int. In between, it 367 * appears that it does src1 - src0 and does the compare in the 368 * execution type so dst type doesn't matter. 369 */ 370 if (this->intel->gen > 4) 371 this->reg_null_cmp = reg_null_d; 372 else 373 this->reg_null_cmp = reg_null_f; 374 375 this->frag_color = NULL; 376 this->frag_data = NULL; 377 this->frag_depth = NULL; 378 this->first_non_payload_grf = 0; 379 380 this->current_annotation = NULL; 381 this->base_ir = NULL; 382 383 this->virtual_grf_sizes = NULL; 384 this->virtual_grf_next = 0; 385 this->virtual_grf_array_size = 0; 386 this->virtual_grf_def = NULL; 387 this->virtual_grf_use = NULL; 388 this->live_intervals_valid = false; 389 390 this->kill_emitted = false; 391 this->force_uncompressed_stack = 0; 392 this->force_sechalf_stack = 0; 393 } 394 395 ~fs_visitor() 396 { 397 ralloc_free(this->mem_ctx); 398 hash_table_dtor(this->variable_ht); 399 } 400 401 fs_reg *variable_storage(ir_variable *var); 402 int virtual_grf_alloc(int size); 403 void import_uniforms(fs_visitor *v); 404 405 void visit(ir_variable *ir); 406 void visit(ir_assignment *ir); 407 void visit(ir_dereference_variable *ir); 408 void visit(ir_dereference_record *ir); 409 void visit(ir_dereference_array *ir); 410 void visit(ir_expression *ir); 411 void visit(ir_texture *ir); 412 void visit(ir_if *ir); 413 void visit(ir_constant *ir); 414 void visit(ir_swizzle *ir); 415 void visit(ir_return *ir); 416 void visit(ir_loop *ir); 417 void visit(ir_loop_jump *ir); 418 void visit(ir_discard *ir); 419 void visit(ir_call *ir); 420 void visit(ir_function *ir); 421 void visit(ir_function_signature *ir); 422 423 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 424 425 fs_inst *emit(fs_inst inst); 426 427 fs_inst *emit(enum opcode opcode) 428 { 429 return emit(fs_inst(opcode)); 430 } 431 432 fs_inst *emit(enum opcode opcode, fs_reg dst) 433 { 434 return emit(fs_inst(opcode, dst)); 435 } 436 437 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) 438 { 439 return emit(fs_inst(opcode, dst, src0)); 440 } 441 442 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) 443 { 444 return emit(fs_inst(opcode, dst, src0, src1)); 445 } 446 447 fs_inst *emit(enum opcode opcode, fs_reg dst, 448 fs_reg src0, fs_reg src1, fs_reg src2) 449 { 450 return emit(fs_inst(opcode, dst, src0, src1, src2)); 451 } 452 453 int type_size(const struct glsl_type *type); 454 455 bool run(); 456 void setup_paramvalues_refs(); 457 void assign_curb_setup(); 458 void calculate_urb_setup(); 459 void assign_urb_setup(); 460 bool assign_regs(); 461 void assign_regs_trivial(); 462 int choose_spill_reg(struct ra_graph *g); 463 void spill_reg(int spill_reg); 464 void split_virtual_grfs(); 465 void setup_pull_constants(); 466 void calculate_live_intervals(); 467 bool propagate_constants(); 468 bool opt_algebraic(); 469 bool register_coalesce(); 470 bool compute_to_mrf(); 471 bool dead_code_eliminate(); 472 bool remove_dead_constants(); 473 bool remove_duplicate_mrf_writes(); 474 bool virtual_grf_interferes(int a, int b); 475 void schedule_instructions(); 476 void fail(const char *msg, ...); 477 478 void push_force_uncompressed(); 479 void pop_force_uncompressed(); 480 void push_force_sechalf(); 481 void pop_force_sechalf(); 482 483 void generate_code(); 484 void generate_fb_write(fs_inst *inst); 485 void generate_pixel_xy(struct brw_reg dst, bool is_x); 486 void generate_linterp(fs_inst *inst, struct brw_reg dst, 487 struct brw_reg *src); 488 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 489 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 490 void generate_discard(fs_inst *inst); 491 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 492 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 493 void generate_spill(fs_inst *inst, struct brw_reg src); 494 void generate_unspill(fs_inst *inst, struct brw_reg dst); 495 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 496 497 void emit_dummy_fs(); 498 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 499 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 500 fs_reg *emit_general_interpolation(ir_variable *ir); 501 void emit_interpolation_setup_gen4(); 502 void emit_interpolation_setup_gen6(); 503 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 504 int sampler); 505 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 506 int sampler); 507 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 508 int sampler); 509 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 510 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 511 bool try_emit_saturate(ir_expression *ir); 512 void emit_bool_to_cond_code(ir_rvalue *condition); 513 void emit_if_gen6(ir_if *ir); 514 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 515 516 void emit_color_write(int index, int first_color_mrf, fs_reg color); 517 void emit_fb_writes(); 518 void emit_assignment_writes(fs_reg &l, fs_reg &r, 519 const glsl_type *type, bool predicated); 520 521 struct brw_reg interp_reg(int location, int channel); 522 int setup_uniform_values(int loc, const glsl_type *type); 523 void setup_builtin_uniform_values(ir_variable *ir); 524 int implied_mrf_writes(fs_inst *inst); 525 526 struct brw_context *brw; 527 const struct gl_fragment_program *fp; 528 struct intel_context *intel; 529 struct gl_context *ctx; 530 struct brw_wm_compile *c; 531 struct brw_compile *p; 532 struct brw_shader *shader; 533 struct gl_shader_program *prog; 534 void *mem_ctx; 535 exec_list instructions; 536 537 /* Delayed setup of c->prog_data.params[] due to realloc of 538 * ParamValues[] during compile. 539 */ 540 int param_index[MAX_UNIFORMS * 4]; 541 int param_offset[MAX_UNIFORMS * 4]; 542 543 int *virtual_grf_sizes; 544 int virtual_grf_next; 545 int virtual_grf_array_size; 546 int *virtual_grf_def; 547 int *virtual_grf_use; 548 bool live_intervals_valid; 549 550 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 551 * the visitor to the packed uniform number after 552 * remove_dead_constants() that represents the actual uploaded 553 * uniform index. 554 */ 555 int *params_remap; 556 557 struct hash_table *variable_ht; 558 ir_variable *frag_color, *frag_data, *frag_depth; 559 int first_non_payload_grf; 560 int urb_setup[FRAG_ATTRIB_MAX]; 561 bool kill_emitted; 562 563 /** @{ debug annotation info */ 564 const char *current_annotation; 565 ir_instruction *base_ir; 566 /** @} */ 567 568 bool failed; 569 char *fail_msg; 570 571 /* On entry to a visit() method, this is the storage for the 572 * result. On exit, the visit() called may have changed it, in 573 * which case the parent must use the new storage instead. 574 */ 575 fs_reg result; 576 577 fs_reg pixel_x; 578 fs_reg pixel_y; 579 fs_reg wpos_w; 580 fs_reg pixel_w; 581 fs_reg delta_x; 582 fs_reg delta_y; 583 fs_reg reg_null_cmp; 584 585 int grf_used; 586 587 int force_uncompressed_stack; 588 int force_sechalf_stack; 589}; 590 591GLboolean brw_do_channel_expressions(struct exec_list *instructions); 592GLboolean brw_do_vector_splitting(struct exec_list *instructions); 593bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 594