brw_fs.h revision 3b20f999bb7e9056e83ca09a842a9747d4ac1674
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44} 45#include "../glsl/glsl_types.h" 46#include "../glsl/ir.h" 47 48enum register_file { 49 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 50 GRF = BRW_GENERAL_REGISTER_FILE, 51 MRF = BRW_MESSAGE_REGISTER_FILE, 52 IMM = BRW_IMMEDIATE_VALUE, 53 FIXED_HW_REG, /* a struct brw_reg */ 54 UNIFORM, /* prog_data->params[hw_reg] */ 55 BAD_FILE 56}; 57 58enum fs_opcodes { 59 FS_OPCODE_FB_WRITE = 256, 60 FS_OPCODE_RCP, 61 FS_OPCODE_RSQ, 62 FS_OPCODE_SQRT, 63 FS_OPCODE_EXP2, 64 FS_OPCODE_LOG2, 65 FS_OPCODE_POW, 66 FS_OPCODE_SIN, 67 FS_OPCODE_COS, 68 FS_OPCODE_DDX, 69 FS_OPCODE_DDY, 70 FS_OPCODE_PIXEL_X, 71 FS_OPCODE_PIXEL_Y, 72 FS_OPCODE_CINTERP, 73 FS_OPCODE_LINTERP, 74 FS_OPCODE_TEX, 75 FS_OPCODE_TXB, 76 FS_OPCODE_TXD, 77 FS_OPCODE_TXL, 78 FS_OPCODE_DISCARD_NOT, 79 FS_OPCODE_DISCARD_AND, 80 FS_OPCODE_SPILL, 81 FS_OPCODE_UNSPILL, 82 FS_OPCODE_PULL_CONSTANT_LOAD, 83}; 84 85 86class fs_reg { 87public: 88 /* Callers of this ralloc-based new need not call delete. It's 89 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 90 static void* operator new(size_t size, void *ctx) 91 { 92 void *node; 93 94 node = ralloc_size(ctx, size); 95 assert(node != NULL); 96 97 return node; 98 } 99 100 void init() 101 { 102 memset(this, 0, sizeof(*this)); 103 this->hw_reg = -1; 104 this->smear = -1; 105 } 106 107 /** Generic unset register constructor. */ 108 fs_reg() 109 { 110 init(); 111 this->file = BAD_FILE; 112 } 113 114 /** Immediate value constructor. */ 115 fs_reg(float f) 116 { 117 init(); 118 this->file = IMM; 119 this->type = BRW_REGISTER_TYPE_F; 120 this->imm.f = f; 121 } 122 123 /** Immediate value constructor. */ 124 fs_reg(int32_t i) 125 { 126 init(); 127 this->file = IMM; 128 this->type = BRW_REGISTER_TYPE_D; 129 this->imm.i = i; 130 } 131 132 /** Immediate value constructor. */ 133 fs_reg(uint32_t u) 134 { 135 init(); 136 this->file = IMM; 137 this->type = BRW_REGISTER_TYPE_UD; 138 this->imm.u = u; 139 } 140 141 /** Fixed brw_reg Immediate value constructor. */ 142 fs_reg(struct brw_reg fixed_hw_reg) 143 { 144 init(); 145 this->file = FIXED_HW_REG; 146 this->fixed_hw_reg = fixed_hw_reg; 147 this->type = fixed_hw_reg.type; 148 } 149 150 fs_reg(enum register_file file, int hw_reg); 151 fs_reg(enum register_file file, int hw_reg, uint32_t type); 152 fs_reg(class fs_visitor *v, const struct glsl_type *type); 153 154 bool equals(fs_reg *r) 155 { 156 return (file == r->file && 157 reg == r->reg && 158 reg_offset == r->reg_offset && 159 hw_reg == r->hw_reg && 160 type == r->type && 161 negate == r->negate && 162 abs == r->abs && 163 memcmp(&fixed_hw_reg, &r->fixed_hw_reg, 164 sizeof(fixed_hw_reg)) == 0 && 165 smear == r->smear && 166 imm.u == r->imm.u); 167 } 168 169 /** Register file: ARF, GRF, MRF, IMM. */ 170 enum register_file file; 171 /** virtual register number. 0 = fixed hw reg */ 172 int reg; 173 /** Offset within the virtual register. */ 174 int reg_offset; 175 /** HW register number. Generally unset until register allocation. */ 176 int hw_reg; 177 /** Register type. BRW_REGISTER_TYPE_* */ 178 int type; 179 bool negate; 180 bool abs; 181 bool sechalf; 182 struct brw_reg fixed_hw_reg; 183 int smear; /* -1, or a channel of the reg to smear to all channels. */ 184 185 /** Value for file == BRW_IMMMEDIATE_FILE */ 186 union { 187 int32_t i; 188 uint32_t u; 189 float f; 190 } imm; 191}; 192 193static const fs_reg reg_undef; 194static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 195static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 196 197class fs_inst : public exec_node { 198public: 199 /* Callers of this ralloc-based new need not call delete. It's 200 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 201 static void* operator new(size_t size, void *ctx) 202 { 203 void *node; 204 205 node = rzalloc_size(ctx, size); 206 assert(node != NULL); 207 208 return node; 209 } 210 211 void init() 212 { 213 memset(this, 0, sizeof(*this)); 214 this->opcode = BRW_OPCODE_NOP; 215 this->conditional_mod = BRW_CONDITIONAL_NONE; 216 217 this->dst = reg_undef; 218 this->src[0] = reg_undef; 219 this->src[1] = reg_undef; 220 this->src[2] = reg_undef; 221 } 222 223 fs_inst() 224 { 225 init(); 226 } 227 228 fs_inst(int opcode) 229 { 230 init(); 231 this->opcode = opcode; 232 } 233 234 fs_inst(int opcode, fs_reg dst) 235 { 236 init(); 237 this->opcode = opcode; 238 this->dst = dst; 239 240 if (dst.file == GRF) 241 assert(dst.reg_offset >= 0); 242 } 243 244 fs_inst(int opcode, fs_reg dst, fs_reg src0) 245 { 246 init(); 247 this->opcode = opcode; 248 this->dst = dst; 249 this->src[0] = src0; 250 251 if (dst.file == GRF) 252 assert(dst.reg_offset >= 0); 253 if (src[0].file == GRF) 254 assert(src[0].reg_offset >= 0); 255 } 256 257 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 258 { 259 init(); 260 this->opcode = opcode; 261 this->dst = dst; 262 this->src[0] = src0; 263 this->src[1] = src1; 264 265 if (dst.file == GRF) 266 assert(dst.reg_offset >= 0); 267 if (src[0].file == GRF) 268 assert(src[0].reg_offset >= 0); 269 if (src[1].file == GRF) 270 assert(src[1].reg_offset >= 0); 271 } 272 273 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 274 { 275 init(); 276 this->opcode = opcode; 277 this->dst = dst; 278 this->src[0] = src0; 279 this->src[1] = src1; 280 this->src[2] = src2; 281 282 if (dst.file == GRF) 283 assert(dst.reg_offset >= 0); 284 if (src[0].file == GRF) 285 assert(src[0].reg_offset >= 0); 286 if (src[1].file == GRF) 287 assert(src[1].reg_offset >= 0); 288 if (src[2].file == GRF) 289 assert(src[2].reg_offset >= 0); 290 } 291 292 bool equals(fs_inst *inst) 293 { 294 return (opcode == inst->opcode && 295 dst.equals(&inst->dst) && 296 src[0].equals(&inst->src[0]) && 297 src[1].equals(&inst->src[1]) && 298 src[2].equals(&inst->src[2]) && 299 saturate == inst->saturate && 300 predicated == inst->predicated && 301 conditional_mod == inst->conditional_mod && 302 mlen == inst->mlen && 303 base_mrf == inst->base_mrf && 304 sampler == inst->sampler && 305 target == inst->target && 306 eot == inst->eot && 307 header_present == inst->header_present && 308 shadow_compare == inst->shadow_compare && 309 offset == inst->offset); 310 } 311 312 bool is_tex() 313 { 314 return (opcode == FS_OPCODE_TEX || 315 opcode == FS_OPCODE_TXB || 316 opcode == FS_OPCODE_TXD || 317 opcode == FS_OPCODE_TXL); 318 } 319 320 bool is_math() 321 { 322 return (opcode == FS_OPCODE_RCP || 323 opcode == FS_OPCODE_RSQ || 324 opcode == FS_OPCODE_SQRT || 325 opcode == FS_OPCODE_EXP2 || 326 opcode == FS_OPCODE_LOG2 || 327 opcode == FS_OPCODE_SIN || 328 opcode == FS_OPCODE_COS || 329 opcode == FS_OPCODE_POW); 330 } 331 332 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 333 fs_reg dst; 334 fs_reg src[3]; 335 bool saturate; 336 bool predicated; 337 bool predicate_inverse; 338 int conditional_mod; /**< BRW_CONDITIONAL_* */ 339 340 int mlen; /**< SEND message length */ 341 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 342 int sampler; 343 int target; /**< MRT target. */ 344 bool eot; 345 bool header_present; 346 bool shadow_compare; 347 bool force_uncompressed; 348 bool force_sechalf; 349 uint32_t offset; /* spill/unspill offset */ 350 351 /** @{ 352 * Annotation for the generated IR. One of the two can be set. 353 */ 354 ir_instruction *ir; 355 const char *annotation; 356 /** @} */ 357}; 358 359class fs_visitor : public ir_visitor 360{ 361public: 362 363 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 364 { 365 this->c = c; 366 this->p = &c->func; 367 this->brw = p->brw; 368 this->fp = brw->fragment_program; 369 this->intel = &brw->intel; 370 this->ctx = &intel->ctx; 371 this->mem_ctx = ralloc_context(NULL); 372 this->shader = shader; 373 this->failed = false; 374 this->variable_ht = hash_table_ctor(0, 375 hash_table_pointer_hash, 376 hash_table_pointer_compare); 377 378 /* There's a question that appears to be left open in the spec: 379 * How do implicit dst conversions interact with the CMP 380 * instruction or conditional mods? On gen6, the instruction: 381 * 382 * CMP null<d> src0<f> src1<f> 383 * 384 * will do src1 - src0 and compare that result as if it was an 385 * integer. On gen4, it will do src1 - src0 as float, convert 386 * the result to int, and compare as int. In between, it 387 * appears that it does src1 - src0 and does the compare in the 388 * execution type so dst type doesn't matter. 389 */ 390 if (this->intel->gen > 4) 391 this->reg_null_cmp = reg_null_d; 392 else 393 this->reg_null_cmp = reg_null_f; 394 395 this->frag_color = NULL; 396 this->frag_data = NULL; 397 this->frag_depth = NULL; 398 this->first_non_payload_grf = 0; 399 400 this->current_annotation = NULL; 401 this->base_ir = NULL; 402 403 this->virtual_grf_sizes = NULL; 404 this->virtual_grf_next = 1; 405 this->virtual_grf_array_size = 0; 406 this->virtual_grf_def = NULL; 407 this->virtual_grf_use = NULL; 408 this->live_intervals_valid = false; 409 410 this->kill_emitted = false; 411 this->force_uncompressed_stack = 0; 412 this->force_sechalf_stack = 0; 413 } 414 415 ~fs_visitor() 416 { 417 ralloc_free(this->mem_ctx); 418 hash_table_dtor(this->variable_ht); 419 } 420 421 fs_reg *variable_storage(ir_variable *var); 422 int virtual_grf_alloc(int size); 423 void import_uniforms(struct hash_table *src_variable_ht); 424 425 void visit(ir_variable *ir); 426 void visit(ir_assignment *ir); 427 void visit(ir_dereference_variable *ir); 428 void visit(ir_dereference_record *ir); 429 void visit(ir_dereference_array *ir); 430 void visit(ir_expression *ir); 431 void visit(ir_texture *ir); 432 void visit(ir_if *ir); 433 void visit(ir_constant *ir); 434 void visit(ir_swizzle *ir); 435 void visit(ir_return *ir); 436 void visit(ir_loop *ir); 437 void visit(ir_loop_jump *ir); 438 void visit(ir_discard *ir); 439 void visit(ir_call *ir); 440 void visit(ir_function *ir); 441 void visit(ir_function_signature *ir); 442 443 fs_inst *emit(fs_inst inst); 444 445 fs_inst *emit(int opcode) 446 { 447 return emit(fs_inst(opcode)); 448 } 449 450 fs_inst *emit(int opcode, fs_reg dst) 451 { 452 return emit(fs_inst(opcode, dst)); 453 } 454 455 fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) 456 { 457 return emit(fs_inst(opcode, dst, src0)); 458 } 459 460 fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 461 { 462 return emit(fs_inst(opcode, dst, src0, src1)); 463 } 464 465 fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 466 { 467 return emit(fs_inst(opcode, dst, src0, src1, src2)); 468 } 469 470 bool run(); 471 void setup_paramvalues_refs(); 472 void assign_curb_setup(); 473 void calculate_urb_setup(); 474 void assign_urb_setup(); 475 bool assign_regs(); 476 void assign_regs_trivial(); 477 int choose_spill_reg(struct ra_graph *g); 478 void spill_reg(int spill_reg); 479 void split_virtual_grfs(); 480 void setup_pull_constants(); 481 void calculate_live_intervals(); 482 bool propagate_constants(); 483 bool register_coalesce(); 484 bool compute_to_mrf(); 485 bool dead_code_eliminate(); 486 bool remove_duplicate_mrf_writes(); 487 bool virtual_grf_interferes(int a, int b); 488 void schedule_instructions(); 489 void fail(const char *msg, ...); 490 491 void push_force_uncompressed(); 492 void pop_force_uncompressed(); 493 void push_force_sechalf(); 494 void pop_force_sechalf(); 495 496 void generate_code(); 497 void generate_fb_write(fs_inst *inst); 498 void generate_pixel_xy(struct brw_reg dst, bool is_x); 499 void generate_linterp(fs_inst *inst, struct brw_reg dst, 500 struct brw_reg *src); 501 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 502 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 503 void generate_discard_not(fs_inst *inst, struct brw_reg temp); 504 void generate_discard_and(fs_inst *inst, struct brw_reg temp); 505 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 506 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 507 void generate_spill(fs_inst *inst, struct brw_reg src); 508 void generate_unspill(fs_inst *inst, struct brw_reg dst); 509 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 510 511 void emit_dummy_fs(); 512 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 513 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 514 fs_reg *emit_general_interpolation(ir_variable *ir); 515 void emit_interpolation_setup_gen4(); 516 void emit_interpolation_setup_gen6(); 517 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate); 518 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); 519 fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); 520 fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); 521 bool try_emit_saturate(ir_expression *ir); 522 void emit_bool_to_cond_code(ir_rvalue *condition); 523 void emit_if_gen6(ir_if *ir); 524 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 525 526 void emit_color_write(int index, int first_color_mrf, fs_reg color); 527 void emit_fb_writes(); 528 void emit_assignment_writes(fs_reg &l, fs_reg &r, 529 const glsl_type *type, bool predicated); 530 531 struct brw_reg interp_reg(int location, int channel); 532 int setup_uniform_values(int loc, const glsl_type *type); 533 void setup_builtin_uniform_values(ir_variable *ir); 534 int implied_mrf_writes(fs_inst *inst); 535 536 struct brw_context *brw; 537 const struct gl_fragment_program *fp; 538 struct intel_context *intel; 539 struct gl_context *ctx; 540 struct brw_wm_compile *c; 541 struct brw_compile *p; 542 struct brw_shader *shader; 543 void *mem_ctx; 544 exec_list instructions; 545 546 /* Delayed setup of c->prog_data.params[] due to realloc of 547 * ParamValues[] during compile. 548 */ 549 int param_index[MAX_UNIFORMS * 4]; 550 int param_offset[MAX_UNIFORMS * 4]; 551 552 int *virtual_grf_sizes; 553 int virtual_grf_next; 554 int virtual_grf_array_size; 555 int *virtual_grf_def; 556 int *virtual_grf_use; 557 bool live_intervals_valid; 558 559 struct hash_table *variable_ht; 560 ir_variable *frag_color, *frag_data, *frag_depth; 561 int first_non_payload_grf; 562 int urb_setup[FRAG_ATTRIB_MAX]; 563 bool kill_emitted; 564 565 /** @{ debug annotation info */ 566 const char *current_annotation; 567 ir_instruction *base_ir; 568 /** @} */ 569 570 bool failed; 571 572 /* Result of last visit() method. */ 573 fs_reg result; 574 575 fs_reg pixel_x; 576 fs_reg pixel_y; 577 fs_reg wpos_w; 578 fs_reg pixel_w; 579 fs_reg delta_x; 580 fs_reg delta_y; 581 fs_reg reg_null_cmp; 582 583 int grf_used; 584 585 int force_uncompressed_stack; 586 int force_sechalf_stack; 587}; 588 589GLboolean brw_do_channel_expressions(struct exec_list *instructions); 590GLboolean brw_do_vector_splitting(struct exec_list *instructions); 591