brw_fs.h revision e4be665bbddcb6ddfd7b9b13f01152a97097b35c
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir.h" 48 49enum register_file { 50 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 51 GRF = BRW_GENERAL_REGISTER_FILE, 52 MRF = BRW_MESSAGE_REGISTER_FILE, 53 IMM = BRW_IMMEDIATE_VALUE, 54 FIXED_HW_REG, /* a struct brw_reg */ 55 UNIFORM, /* prog_data->params[hw_reg] */ 56 BAD_FILE 57}; 58 59enum fs_opcodes { 60 FS_OPCODE_FB_WRITE = 256, 61 FS_OPCODE_RCP, 62 FS_OPCODE_RSQ, 63 FS_OPCODE_SQRT, 64 FS_OPCODE_EXP2, 65 FS_OPCODE_LOG2, 66 FS_OPCODE_POW, 67 FS_OPCODE_SIN, 68 FS_OPCODE_COS, 69 FS_OPCODE_DDX, 70 FS_OPCODE_DDY, 71 FS_OPCODE_CINTERP, 72 FS_OPCODE_LINTERP, 73 FS_OPCODE_TEX, 74 FS_OPCODE_TXB, 75 FS_OPCODE_TXL, 76 FS_OPCODE_DISCARD_NOT, 77 FS_OPCODE_DISCARD_AND, 78 FS_OPCODE_SPILL, 79 FS_OPCODE_UNSPILL, 80 FS_OPCODE_PULL_CONSTANT_LOAD, 81}; 82 83 84class fs_reg { 85public: 86 /* Callers of this talloc-based new need not call delete. It's 87 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 88 static void* operator new(size_t size, void *ctx) 89 { 90 void *node; 91 92 node = talloc_size(ctx, size); 93 assert(node != NULL); 94 95 return node; 96 } 97 98 void init() 99 { 100 memset(this, 0, sizeof(*this)); 101 this->hw_reg = -1; 102 this->smear = -1; 103 } 104 105 /** Generic unset register constructor. */ 106 fs_reg() 107 { 108 init(); 109 this->file = BAD_FILE; 110 } 111 112 /** Immediate value constructor. */ 113 fs_reg(float f) 114 { 115 init(); 116 this->file = IMM; 117 this->type = BRW_REGISTER_TYPE_F; 118 this->imm.f = f; 119 } 120 121 /** Immediate value constructor. */ 122 fs_reg(int32_t i) 123 { 124 init(); 125 this->file = IMM; 126 this->type = BRW_REGISTER_TYPE_D; 127 this->imm.i = i; 128 } 129 130 /** Immediate value constructor. */ 131 fs_reg(uint32_t u) 132 { 133 init(); 134 this->file = IMM; 135 this->type = BRW_REGISTER_TYPE_UD; 136 this->imm.u = u; 137 } 138 139 /** Fixed brw_reg Immediate value constructor. */ 140 fs_reg(struct brw_reg fixed_hw_reg) 141 { 142 init(); 143 this->file = FIXED_HW_REG; 144 this->fixed_hw_reg = fixed_hw_reg; 145 this->type = fixed_hw_reg.type; 146 } 147 148 fs_reg(enum register_file file, int hw_reg); 149 fs_reg(enum register_file file, int hw_reg, uint32_t type); 150 fs_reg(class fs_visitor *v, const struct glsl_type *type); 151 152 bool equals(fs_reg *r) 153 { 154 return (file == r->file && 155 reg == r->reg && 156 reg_offset == r->reg_offset && 157 hw_reg == r->hw_reg && 158 type == r->type && 159 negate == r->negate && 160 abs == r->abs && 161 memcmp(&fixed_hw_reg, &r->fixed_hw_reg, 162 sizeof(fixed_hw_reg)) == 0 && 163 smear == r->smear && 164 imm.u == r->imm.u); 165 } 166 167 /** Register file: ARF, GRF, MRF, IMM. */ 168 enum register_file file; 169 /** virtual register number. 0 = fixed hw reg */ 170 int reg; 171 /** Offset within the virtual register. */ 172 int reg_offset; 173 /** HW register number. Generally unset until register allocation. */ 174 int hw_reg; 175 /** Register type. BRW_REGISTER_TYPE_* */ 176 int type; 177 bool negate; 178 bool abs; 179 struct brw_reg fixed_hw_reg; 180 int smear; /* -1, or a channel of the reg to smear to all channels. */ 181 182 /** Value for file == BRW_IMMMEDIATE_FILE */ 183 union { 184 int32_t i; 185 uint32_t u; 186 float f; 187 } imm; 188}; 189 190static const fs_reg reg_undef; 191static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 192static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 193 194class fs_inst : public exec_node { 195public: 196 /* Callers of this talloc-based new need not call delete. It's 197 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 198 static void* operator new(size_t size, void *ctx) 199 { 200 void *node; 201 202 node = talloc_zero_size(ctx, size); 203 assert(node != NULL); 204 205 return node; 206 } 207 208 void init() 209 { 210 memset(this, 0, sizeof(*this)); 211 this->opcode = BRW_OPCODE_NOP; 212 this->conditional_mod = BRW_CONDITIONAL_NONE; 213 214 this->dst = reg_undef; 215 this->src[0] = reg_undef; 216 this->src[1] = reg_undef; 217 this->src[2] = reg_undef; 218 } 219 220 fs_inst() 221 { 222 init(); 223 } 224 225 fs_inst(int opcode) 226 { 227 init(); 228 this->opcode = opcode; 229 } 230 231 fs_inst(int opcode, fs_reg dst) 232 { 233 init(); 234 this->opcode = opcode; 235 this->dst = dst; 236 237 if (dst.file == GRF) 238 assert(dst.reg_offset >= 0); 239 } 240 241 fs_inst(int opcode, fs_reg dst, fs_reg src0) 242 { 243 init(); 244 this->opcode = opcode; 245 this->dst = dst; 246 this->src[0] = src0; 247 248 if (dst.file == GRF) 249 assert(dst.reg_offset >= 0); 250 if (src[0].file == GRF) 251 assert(src[0].reg_offset >= 0); 252 } 253 254 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 255 { 256 init(); 257 this->opcode = opcode; 258 this->dst = dst; 259 this->src[0] = src0; 260 this->src[1] = src1; 261 262 if (dst.file == GRF) 263 assert(dst.reg_offset >= 0); 264 if (src[0].file == GRF) 265 assert(src[0].reg_offset >= 0); 266 if (src[1].file == GRF) 267 assert(src[1].reg_offset >= 0); 268 } 269 270 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 271 { 272 init(); 273 this->opcode = opcode; 274 this->dst = dst; 275 this->src[0] = src0; 276 this->src[1] = src1; 277 this->src[2] = src2; 278 279 if (dst.file == GRF) 280 assert(dst.reg_offset >= 0); 281 if (src[0].file == GRF) 282 assert(src[0].reg_offset >= 0); 283 if (src[1].file == GRF) 284 assert(src[1].reg_offset >= 0); 285 if (src[2].file == GRF) 286 assert(src[2].reg_offset >= 0); 287 } 288 289 bool equals(fs_inst *inst) 290 { 291 return (opcode == inst->opcode && 292 dst.equals(&inst->dst) && 293 src[0].equals(&inst->src[0]) && 294 src[1].equals(&inst->src[1]) && 295 src[2].equals(&inst->src[2]) && 296 saturate == inst->saturate && 297 predicated == inst->predicated && 298 conditional_mod == inst->conditional_mod && 299 mlen == inst->mlen && 300 base_mrf == inst->base_mrf && 301 sampler == inst->sampler && 302 target == inst->target && 303 eot == inst->eot && 304 header_present == inst->header_present && 305 shadow_compare == inst->shadow_compare && 306 offset == inst->offset); 307 } 308 309 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 310 fs_reg dst; 311 fs_reg src[3]; 312 bool saturate; 313 bool predicated; 314 int conditional_mod; /**< BRW_CONDITIONAL_* */ 315 316 int mlen; /**< SEND message length */ 317 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 318 int sampler; 319 int target; /**< MRT target. */ 320 bool eot; 321 bool header_present; 322 bool shadow_compare; 323 uint32_t offset; /* spill/unspill offset */ 324 325 /** @{ 326 * Annotation for the generated IR. One of the two can be set. 327 */ 328 ir_instruction *ir; 329 const char *annotation; 330 /** @} */ 331}; 332 333class fs_visitor : public ir_visitor 334{ 335public: 336 337 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 338 { 339 this->c = c; 340 this->p = &c->func; 341 this->brw = p->brw; 342 this->fp = brw->fragment_program; 343 this->intel = &brw->intel; 344 this->ctx = &intel->ctx; 345 this->mem_ctx = talloc_new(NULL); 346 this->shader = shader; 347 this->fail = false; 348 this->variable_ht = hash_table_ctor(0, 349 hash_table_pointer_hash, 350 hash_table_pointer_compare); 351 352 /* There's a question that appears to be left open in the spec: 353 * How do implicit dst conversions interact with the CMP 354 * instruction or conditional mods? On gen6, the instruction: 355 * 356 * CMP null<d> src0<f> src1<f> 357 * 358 * will do src1 - src0 and compare that result as if it was an 359 * integer. On gen4, it will do src1 - src0 as float, convert 360 * the result to int, and compare as int. In between, it 361 * appears that it does src1 - src0 and does the compare in the 362 * execution type so dst type doesn't matter. 363 */ 364 if (this->intel->gen > 4) 365 this->reg_null_cmp = reg_null_d; 366 else 367 this->reg_null_cmp = reg_null_f; 368 369 this->frag_color = NULL; 370 this->frag_data = NULL; 371 this->frag_depth = NULL; 372 this->first_non_payload_grf = 0; 373 374 this->current_annotation = NULL; 375 this->base_ir = NULL; 376 377 this->virtual_grf_sizes = NULL; 378 this->virtual_grf_next = 1; 379 this->virtual_grf_array_size = 0; 380 this->virtual_grf_def = NULL; 381 this->virtual_grf_use = NULL; 382 this->live_intervals_valid = false; 383 384 this->kill_emitted = false; 385 } 386 387 ~fs_visitor() 388 { 389 talloc_free(this->mem_ctx); 390 hash_table_dtor(this->variable_ht); 391 } 392 393 fs_reg *variable_storage(ir_variable *var); 394 int virtual_grf_alloc(int size); 395 396 void visit(ir_variable *ir); 397 void visit(ir_assignment *ir); 398 void visit(ir_dereference_variable *ir); 399 void visit(ir_dereference_record *ir); 400 void visit(ir_dereference_array *ir); 401 void visit(ir_expression *ir); 402 void visit(ir_texture *ir); 403 void visit(ir_if *ir); 404 void visit(ir_constant *ir); 405 void visit(ir_swizzle *ir); 406 void visit(ir_return *ir); 407 void visit(ir_loop *ir); 408 void visit(ir_loop_jump *ir); 409 void visit(ir_discard *ir); 410 void visit(ir_call *ir); 411 void visit(ir_function *ir); 412 void visit(ir_function_signature *ir); 413 414 fs_inst *emit(fs_inst inst); 415 void setup_paramvalues_refs(); 416 void assign_curb_setup(); 417 void calculate_urb_setup(); 418 void assign_urb_setup(); 419 bool assign_regs(); 420 void assign_regs_trivial(); 421 int choose_spill_reg(struct ra_graph *g); 422 void spill_reg(int spill_reg); 423 void split_virtual_grfs(); 424 void setup_pull_constants(); 425 void calculate_live_intervals(); 426 bool propagate_constants(); 427 bool register_coalesce(); 428 bool compute_to_mrf(); 429 bool dead_code_eliminate(); 430 bool remove_duplicate_mrf_writes(); 431 bool virtual_grf_interferes(int a, int b); 432 void generate_code(); 433 void generate_fb_write(fs_inst *inst); 434 void generate_linterp(fs_inst *inst, struct brw_reg dst, 435 struct brw_reg *src); 436 void generate_tex(fs_inst *inst, struct brw_reg dst); 437 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 438 void generate_discard_not(fs_inst *inst, struct brw_reg temp); 439 void generate_discard_and(fs_inst *inst, struct brw_reg temp); 440 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 441 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 442 void generate_spill(fs_inst *inst, struct brw_reg src); 443 void generate_unspill(fs_inst *inst, struct brw_reg dst); 444 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 445 446 void emit_dummy_fs(); 447 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 448 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 449 fs_reg *emit_general_interpolation(ir_variable *ir); 450 void emit_interpolation_setup_gen4(); 451 void emit_interpolation_setup_gen6(); 452 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate); 453 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); 454 fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); 455 fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); 456 bool try_emit_saturate(ir_expression *ir); 457 void emit_bool_to_cond_code(ir_rvalue *condition); 458 void emit_if_gen6(ir_if *ir); 459 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 460 461 void emit_fb_writes(); 462 void emit_assignment_writes(fs_reg &l, fs_reg &r, 463 const glsl_type *type, bool predicated); 464 465 struct brw_reg interp_reg(int location, int channel); 466 int setup_uniform_values(int loc, const glsl_type *type); 467 void setup_builtin_uniform_values(ir_variable *ir); 468 int implied_mrf_writes(fs_inst *inst); 469 470 struct brw_context *brw; 471 const struct gl_fragment_program *fp; 472 struct intel_context *intel; 473 struct gl_context *ctx; 474 struct brw_wm_compile *c; 475 struct brw_compile *p; 476 struct brw_shader *shader; 477 void *mem_ctx; 478 exec_list instructions; 479 480 /* Delayed setup of c->prog_data.params[] due to realloc of 481 * ParamValues[] during compile. 482 */ 483 int param_index[MAX_UNIFORMS * 4]; 484 int param_offset[MAX_UNIFORMS * 4]; 485 486 int *virtual_grf_sizes; 487 int virtual_grf_next; 488 int virtual_grf_array_size; 489 int *virtual_grf_def; 490 int *virtual_grf_use; 491 bool live_intervals_valid; 492 493 struct hash_table *variable_ht; 494 ir_variable *frag_color, *frag_data, *frag_depth; 495 int first_non_payload_grf; 496 int urb_setup[FRAG_ATTRIB_MAX]; 497 bool kill_emitted; 498 499 /** @{ debug annotation info */ 500 const char *current_annotation; 501 ir_instruction *base_ir; 502 /** @} */ 503 504 bool fail; 505 506 /* Result of last visit() method. */ 507 fs_reg result; 508 509 fs_reg pixel_x; 510 fs_reg pixel_y; 511 fs_reg wpos_w; 512 fs_reg pixel_w; 513 fs_reg delta_x; 514 fs_reg delta_y; 515 fs_reg reg_null_cmp; 516 517 int grf_used; 518}; 519 520GLboolean brw_do_channel_expressions(struct exec_list *instructions); 521GLboolean brw_do_vector_splitting(struct exec_list *instructions); 522