brw_fs.cpp revision 3bf8774e9c293fcad654d1bd67d4b43247b82f97
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir_optimization.h" 48#include "../glsl/ir_print_visitor.h" 49 50enum register_file { 51 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 52 GRF = BRW_GENERAL_REGISTER_FILE, 53 MRF = BRW_MESSAGE_REGISTER_FILE, 54 IMM = BRW_IMMEDIATE_VALUE, 55 FIXED_HW_REG, /* a struct brw_reg */ 56 UNIFORM, /* prog_data->params[hw_reg] */ 57 BAD_FILE 58}; 59 60enum fs_opcodes { 61 FS_OPCODE_FB_WRITE = 256, 62 FS_OPCODE_RCP, 63 FS_OPCODE_RSQ, 64 FS_OPCODE_SQRT, 65 FS_OPCODE_EXP2, 66 FS_OPCODE_LOG2, 67 FS_OPCODE_POW, 68 FS_OPCODE_SIN, 69 FS_OPCODE_COS, 70 FS_OPCODE_DDX, 71 FS_OPCODE_DDY, 72 FS_OPCODE_LINTERP, 73 FS_OPCODE_TEX, 74 FS_OPCODE_TXB, 75 FS_OPCODE_TXL, 76 FS_OPCODE_DISCARD, 77}; 78 79static int using_new_fs = -1; 80static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 81 82struct gl_shader * 83brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 84{ 85 struct brw_shader *shader; 86 87 shader = talloc_zero(NULL, struct brw_shader); 88 if (shader) { 89 shader->base.Type = type; 90 shader->base.Name = name; 91 _mesa_init_shader(ctx, &shader->base); 92 } 93 94 return &shader->base; 95} 96 97struct gl_shader_program * 98brw_new_shader_program(GLcontext *ctx, GLuint name) 99{ 100 struct brw_shader_program *prog; 101 prog = talloc_zero(NULL, struct brw_shader_program); 102 if (prog) { 103 prog->base.Name = name; 104 _mesa_init_shader_program(ctx, &prog->base); 105 } 106 return &prog->base; 107} 108 109GLboolean 110brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 111{ 112 if (!_mesa_ir_compile_shader(ctx, shader)) 113 return GL_FALSE; 114 115 return GL_TRUE; 116} 117 118GLboolean 119brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 120{ 121 if (using_new_fs == -1) 122 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 123 124 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 125 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 126 127 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 128 void *mem_ctx = talloc_new(NULL); 129 bool progress; 130 131 if (shader->ir) 132 talloc_free(shader->ir); 133 shader->ir = new(shader) exec_list; 134 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 135 136 do_mat_op_to_vec(shader->ir); 137 do_mod_to_fract(shader->ir); 138 do_div_to_mul_rcp(shader->ir); 139 do_sub_to_add_neg(shader->ir); 140 do_explog_to_explog2(shader->ir); 141 do_lower_texture_projection(shader->ir); 142 143 do { 144 progress = false; 145 146 brw_do_channel_expressions(shader->ir); 147 brw_do_vector_splitting(shader->ir); 148 149 progress = do_lower_jumps(shader->ir, true, true, 150 true, /* main return */ 151 false, /* continue */ 152 false /* loops */ 153 ) || progress; 154 155 progress = do_common_optimization(shader->ir, true, 32) || progress; 156 157 progress = lower_noise(shader->ir) || progress; 158 progress = 159 lower_variable_index_to_cond_assign(shader->ir, 160 GL_TRUE, /* input */ 161 GL_TRUE, /* output */ 162 GL_TRUE, /* temp */ 163 GL_TRUE /* uniform */ 164 ) || progress; 165 } while (progress); 166 167 validate_ir_tree(shader->ir); 168 169 reparent_ir(shader->ir, shader->ir); 170 talloc_free(mem_ctx); 171 } 172 } 173 174 if (!_mesa_ir_link_shader(ctx, prog)) 175 return GL_FALSE; 176 177 return GL_TRUE; 178} 179 180static int 181type_size(const struct glsl_type *type) 182{ 183 unsigned int size, i; 184 185 switch (type->base_type) { 186 case GLSL_TYPE_UINT: 187 case GLSL_TYPE_INT: 188 case GLSL_TYPE_FLOAT: 189 case GLSL_TYPE_BOOL: 190 return type->components(); 191 case GLSL_TYPE_ARRAY: 192 return type_size(type->fields.array) * type->length; 193 case GLSL_TYPE_STRUCT: 194 size = 0; 195 for (i = 0; i < type->length; i++) { 196 size += type_size(type->fields.structure[i].type); 197 } 198 return size; 199 case GLSL_TYPE_SAMPLER: 200 /* Samplers take up no register space, since they're baked in at 201 * link time. 202 */ 203 return 0; 204 default: 205 assert(!"not reached"); 206 return 0; 207 } 208} 209 210class fs_reg { 211public: 212 /* Callers of this talloc-based new need not call delete. It's 213 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 214 static void* operator new(size_t size, void *ctx) 215 { 216 void *node; 217 218 node = talloc_size(ctx, size); 219 assert(node != NULL); 220 221 return node; 222 } 223 224 void init() 225 { 226 this->reg = 0; 227 this->reg_offset = 0; 228 this->negate = 0; 229 this->abs = 0; 230 this->hw_reg = -1; 231 } 232 233 /** Generic unset register constructor. */ 234 fs_reg() 235 { 236 init(); 237 this->file = BAD_FILE; 238 } 239 240 /** Immediate value constructor. */ 241 fs_reg(float f) 242 { 243 init(); 244 this->file = IMM; 245 this->type = BRW_REGISTER_TYPE_F; 246 this->imm.f = f; 247 } 248 249 /** Immediate value constructor. */ 250 fs_reg(int32_t i) 251 { 252 init(); 253 this->file = IMM; 254 this->type = BRW_REGISTER_TYPE_D; 255 this->imm.i = i; 256 } 257 258 /** Immediate value constructor. */ 259 fs_reg(uint32_t u) 260 { 261 init(); 262 this->file = IMM; 263 this->type = BRW_REGISTER_TYPE_UD; 264 this->imm.u = u; 265 } 266 267 /** Fixed brw_reg Immediate value constructor. */ 268 fs_reg(struct brw_reg fixed_hw_reg) 269 { 270 init(); 271 this->file = FIXED_HW_REG; 272 this->fixed_hw_reg = fixed_hw_reg; 273 this->type = fixed_hw_reg.type; 274 } 275 276 fs_reg(enum register_file file, int hw_reg); 277 fs_reg(class fs_visitor *v, const struct glsl_type *type); 278 279 /** Register file: ARF, GRF, MRF, IMM. */ 280 enum register_file file; 281 /** virtual register number. 0 = fixed hw reg */ 282 int reg; 283 /** Offset within the virtual register. */ 284 int reg_offset; 285 /** HW register number. Generally unset until register allocation. */ 286 int hw_reg; 287 /** Register type. BRW_REGISTER_TYPE_* */ 288 int type; 289 bool negate; 290 bool abs; 291 struct brw_reg fixed_hw_reg; 292 293 /** Value for file == BRW_IMMMEDIATE_FILE */ 294 union { 295 int32_t i; 296 uint32_t u; 297 float f; 298 } imm; 299}; 300 301static const fs_reg reg_undef; 302static const fs_reg reg_null(ARF, BRW_ARF_NULL); 303 304class fs_inst : public exec_node { 305public: 306 /* Callers of this talloc-based new need not call delete. It's 307 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 308 static void* operator new(size_t size, void *ctx) 309 { 310 void *node; 311 312 node = talloc_zero_size(ctx, size); 313 assert(node != NULL); 314 315 return node; 316 } 317 318 void init() 319 { 320 this->opcode = BRW_OPCODE_NOP; 321 this->saturate = false; 322 this->conditional_mod = BRW_CONDITIONAL_NONE; 323 this->predicated = false; 324 this->sampler = 0; 325 this->target = 0; 326 this->eot = false; 327 this->shadow_compare = false; 328 } 329 330 fs_inst() 331 { 332 init(); 333 } 334 335 fs_inst(int opcode) 336 { 337 init(); 338 this->opcode = opcode; 339 } 340 341 fs_inst(int opcode, fs_reg dst, fs_reg src0) 342 { 343 init(); 344 this->opcode = opcode; 345 this->dst = dst; 346 this->src[0] = src0; 347 } 348 349 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 350 { 351 init(); 352 this->opcode = opcode; 353 this->dst = dst; 354 this->src[0] = src0; 355 this->src[1] = src1; 356 } 357 358 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 359 { 360 init(); 361 this->opcode = opcode; 362 this->dst = dst; 363 this->src[0] = src0; 364 this->src[1] = src1; 365 this->src[2] = src2; 366 } 367 368 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 369 fs_reg dst; 370 fs_reg src[3]; 371 bool saturate; 372 bool predicated; 373 int conditional_mod; /**< BRW_CONDITIONAL_* */ 374 375 int mlen; /**< SEND message length */ 376 int sampler; 377 int target; /**< MRT target. */ 378 bool eot; 379 bool shadow_compare; 380 381 /** @{ 382 * Annotation for the generated IR. One of the two can be set. 383 */ 384 ir_instruction *ir; 385 const char *annotation; 386 /** @} */ 387}; 388 389class fs_visitor : public ir_visitor 390{ 391public: 392 393 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 394 { 395 this->c = c; 396 this->p = &c->func; 397 this->brw = p->brw; 398 this->fp = brw->fragment_program; 399 this->intel = &brw->intel; 400 this->ctx = &intel->ctx; 401 this->mem_ctx = talloc_new(NULL); 402 this->shader = shader; 403 this->fail = false; 404 this->variable_ht = hash_table_ctor(0, 405 hash_table_pointer_hash, 406 hash_table_pointer_compare); 407 408 this->frag_color = NULL; 409 this->frag_data = NULL; 410 this->frag_depth = NULL; 411 this->first_non_payload_grf = 0; 412 413 this->current_annotation = NULL; 414 this->annotation_string = NULL; 415 this->annotation_ir = NULL; 416 this->base_ir = NULL; 417 418 this->virtual_grf_sizes = NULL; 419 this->virtual_grf_next = 1; 420 this->virtual_grf_array_size = 0; 421 this->virtual_grf_def = NULL; 422 this->virtual_grf_use = NULL; 423 } 424 ~fs_visitor() 425 { 426 talloc_free(this->mem_ctx); 427 hash_table_dtor(this->variable_ht); 428 } 429 430 fs_reg *variable_storage(ir_variable *var); 431 int virtual_grf_alloc(int size); 432 433 void visit(ir_variable *ir); 434 void visit(ir_assignment *ir); 435 void visit(ir_dereference_variable *ir); 436 void visit(ir_dereference_record *ir); 437 void visit(ir_dereference_array *ir); 438 void visit(ir_expression *ir); 439 void visit(ir_texture *ir); 440 void visit(ir_if *ir); 441 void visit(ir_constant *ir); 442 void visit(ir_swizzle *ir); 443 void visit(ir_return *ir); 444 void visit(ir_loop *ir); 445 void visit(ir_loop_jump *ir); 446 void visit(ir_discard *ir); 447 void visit(ir_call *ir); 448 void visit(ir_function *ir); 449 void visit(ir_function_signature *ir); 450 451 fs_inst *emit(fs_inst inst); 452 void assign_curb_setup(); 453 void calculate_urb_setup(); 454 void assign_urb_setup(); 455 void assign_regs(); 456 void assign_regs_trivial(); 457 void calculate_live_intervals(); 458 bool propagate_constants(); 459 bool dead_code_eliminate(); 460 bool virtual_grf_interferes(int a, int b); 461 void generate_code(); 462 void generate_fb_write(fs_inst *inst); 463 void generate_linterp(fs_inst *inst, struct brw_reg dst, 464 struct brw_reg *src); 465 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 466 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 467 void generate_discard(fs_inst *inst, struct brw_reg temp); 468 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 469 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 470 471 void emit_dummy_fs(); 472 void emit_fragcoord_interpolation(ir_variable *ir); 473 void emit_general_interpolation(ir_variable *ir); 474 void emit_interpolation_setup_gen4(); 475 void emit_interpolation_setup_gen6(); 476 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate); 477 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); 478 void emit_fb_writes(); 479 void emit_assignment_writes(fs_reg &l, fs_reg &r, 480 const glsl_type *type, bool predicated); 481 482 struct brw_reg interp_reg(int location, int channel); 483 int setup_uniform_values(int loc, const glsl_type *type); 484 void setup_builtin_uniform_values(ir_variable *ir); 485 486 struct brw_context *brw; 487 const struct gl_fragment_program *fp; 488 struct intel_context *intel; 489 GLcontext *ctx; 490 struct brw_wm_compile *c; 491 struct brw_compile *p; 492 struct brw_shader *shader; 493 void *mem_ctx; 494 exec_list instructions; 495 496 int *virtual_grf_sizes; 497 int virtual_grf_next; 498 int virtual_grf_array_size; 499 int *virtual_grf_def; 500 int *virtual_grf_use; 501 502 struct hash_table *variable_ht; 503 ir_variable *frag_color, *frag_data, *frag_depth; 504 int first_non_payload_grf; 505 int urb_setup[FRAG_ATTRIB_MAX]; 506 507 /** @{ debug annotation info */ 508 const char *current_annotation; 509 ir_instruction *base_ir; 510 const char **annotation_string; 511 ir_instruction **annotation_ir; 512 /** @} */ 513 514 bool fail; 515 516 /* Result of last visit() method. */ 517 fs_reg result; 518 519 fs_reg pixel_x; 520 fs_reg pixel_y; 521 fs_reg wpos_w; 522 fs_reg pixel_w; 523 fs_reg delta_x; 524 fs_reg delta_y; 525 526 int grf_used; 527 528}; 529 530int 531fs_visitor::virtual_grf_alloc(int size) 532{ 533 if (virtual_grf_array_size <= virtual_grf_next) { 534 if (virtual_grf_array_size == 0) 535 virtual_grf_array_size = 16; 536 else 537 virtual_grf_array_size *= 2; 538 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 539 int, virtual_grf_array_size); 540 541 /* This slot is always unused. */ 542 virtual_grf_sizes[0] = 0; 543 } 544 virtual_grf_sizes[virtual_grf_next] = size; 545 return virtual_grf_next++; 546} 547 548/** Fixed HW reg constructor. */ 549fs_reg::fs_reg(enum register_file file, int hw_reg) 550{ 551 init(); 552 this->file = file; 553 this->hw_reg = hw_reg; 554 this->type = BRW_REGISTER_TYPE_F; 555} 556 557int 558brw_type_for_base_type(const struct glsl_type *type) 559{ 560 switch (type->base_type) { 561 case GLSL_TYPE_FLOAT: 562 return BRW_REGISTER_TYPE_F; 563 case GLSL_TYPE_INT: 564 case GLSL_TYPE_BOOL: 565 return BRW_REGISTER_TYPE_D; 566 case GLSL_TYPE_UINT: 567 return BRW_REGISTER_TYPE_UD; 568 case GLSL_TYPE_ARRAY: 569 case GLSL_TYPE_STRUCT: 570 /* These should be overridden with the type of the member when 571 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 572 * way to trip up if we don't. 573 */ 574 return BRW_REGISTER_TYPE_UD; 575 default: 576 assert(!"not reached"); 577 return BRW_REGISTER_TYPE_F; 578 } 579} 580 581/** Automatic reg constructor. */ 582fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 583{ 584 init(); 585 586 this->file = GRF; 587 this->reg = v->virtual_grf_alloc(type_size(type)); 588 this->reg_offset = 0; 589 this->type = brw_type_for_base_type(type); 590} 591 592fs_reg * 593fs_visitor::variable_storage(ir_variable *var) 594{ 595 return (fs_reg *)hash_table_find(this->variable_ht, var); 596} 597 598/* Our support for uniforms is piggy-backed on the struct 599 * gl_fragment_program, because that's where the values actually 600 * get stored, rather than in some global gl_shader_program uniform 601 * store. 602 */ 603int 604fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 605{ 606 unsigned int offset = 0; 607 float *vec_values; 608 609 if (type->is_matrix()) { 610 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 611 type->vector_elements, 612 1); 613 614 for (unsigned int i = 0; i < type->matrix_columns; i++) { 615 offset += setup_uniform_values(loc + offset, column); 616 } 617 618 return offset; 619 } 620 621 switch (type->base_type) { 622 case GLSL_TYPE_FLOAT: 623 case GLSL_TYPE_UINT: 624 case GLSL_TYPE_INT: 625 case GLSL_TYPE_BOOL: 626 vec_values = fp->Base.Parameters->ParameterValues[loc]; 627 for (unsigned int i = 0; i < type->vector_elements; i++) { 628 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 629 } 630 return 1; 631 632 case GLSL_TYPE_STRUCT: 633 for (unsigned int i = 0; i < type->length; i++) { 634 offset += setup_uniform_values(loc + offset, 635 type->fields.structure[i].type); 636 } 637 return offset; 638 639 case GLSL_TYPE_ARRAY: 640 for (unsigned int i = 0; i < type->length; i++) { 641 offset += setup_uniform_values(loc + offset, type->fields.array); 642 } 643 return offset; 644 645 case GLSL_TYPE_SAMPLER: 646 /* The sampler takes up a slot, but we don't use any values from it. */ 647 return 1; 648 649 default: 650 assert(!"not reached"); 651 return 0; 652 } 653} 654 655 656/* Our support for builtin uniforms is even scarier than non-builtin. 657 * It sits on top of the PROG_STATE_VAR parameters that are 658 * automatically updated from GL context state. 659 */ 660void 661fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 662{ 663 const struct gl_builtin_uniform_desc *statevar = NULL; 664 665 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 666 statevar = &_mesa_builtin_uniform_desc[i]; 667 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 668 break; 669 } 670 671 if (!statevar->name) { 672 this->fail = true; 673 printf("Failed to find builtin uniform `%s'\n", ir->name); 674 return; 675 } 676 677 int array_count; 678 if (ir->type->is_array()) { 679 array_count = ir->type->length; 680 } else { 681 array_count = 1; 682 } 683 684 for (int a = 0; a < array_count; a++) { 685 for (unsigned int i = 0; i < statevar->num_elements; i++) { 686 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 687 int tokens[STATE_LENGTH]; 688 689 memcpy(tokens, element->tokens, sizeof(element->tokens)); 690 if (ir->type->is_array()) { 691 tokens[1] = a; 692 } 693 694 /* This state reference has already been setup by ir_to_mesa, 695 * but we'll get the same index back here. 696 */ 697 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 698 (gl_state_index *)tokens); 699 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 700 701 /* Add each of the unique swizzles of the element as a 702 * parameter. This'll end up matching the expected layout of 703 * the array/matrix/structure we're trying to fill in. 704 */ 705 int last_swiz = -1; 706 for (unsigned int i = 0; i < 4; i++) { 707 int swiz = GET_SWZ(element->swizzle, i); 708 if (swiz == last_swiz) 709 break; 710 last_swiz = swiz; 711 712 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 713 } 714 } 715 } 716} 717 718void 719fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 720{ 721 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 722 fs_reg wpos = *reg; 723 fs_reg neg_y = this->pixel_y; 724 neg_y.negate = true; 725 726 /* gl_FragCoord.x */ 727 if (ir->pixel_center_integer) { 728 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 729 } else { 730 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 731 } 732 wpos.reg_offset++; 733 734 /* gl_FragCoord.y */ 735 if (ir->origin_upper_left && ir->pixel_center_integer) { 736 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 737 } else { 738 fs_reg pixel_y = this->pixel_y; 739 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 740 741 if (!ir->origin_upper_left) { 742 pixel_y.negate = true; 743 offset += c->key.drawable_height - 1.0; 744 } 745 746 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 747 } 748 wpos.reg_offset++; 749 750 /* gl_FragCoord.z */ 751 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 752 interp_reg(FRAG_ATTRIB_WPOS, 2))); 753 wpos.reg_offset++; 754 755 /* gl_FragCoord.w: Already set up in emit_interpolation */ 756 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 757 758 hash_table_insert(this->variable_ht, reg, ir); 759} 760 761 762void 763fs_visitor::emit_general_interpolation(ir_variable *ir) 764{ 765 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 766 /* Interpolation is always in floating point regs. */ 767 reg->type = BRW_REGISTER_TYPE_F; 768 fs_reg attr = *reg; 769 770 unsigned int array_elements; 771 const glsl_type *type; 772 773 if (ir->type->is_array()) { 774 array_elements = ir->type->length; 775 if (array_elements == 0) { 776 this->fail = true; 777 } 778 type = ir->type->fields.array; 779 } else { 780 array_elements = 1; 781 type = ir->type; 782 } 783 784 int location = ir->location; 785 for (unsigned int i = 0; i < array_elements; i++) { 786 for (unsigned int j = 0; j < type->matrix_columns; j++) { 787 if (urb_setup[location] == -1) { 788 /* If there's no incoming setup data for this slot, don't 789 * emit interpolation for it. 790 */ 791 attr.reg_offset += type->vector_elements; 792 location++; 793 continue; 794 } 795 796 for (unsigned int c = 0; c < type->vector_elements; c++) { 797 struct brw_reg interp = interp_reg(location, c); 798 emit(fs_inst(FS_OPCODE_LINTERP, 799 attr, 800 this->delta_x, 801 this->delta_y, 802 fs_reg(interp))); 803 attr.reg_offset++; 804 } 805 attr.reg_offset -= type->vector_elements; 806 807 for (unsigned int c = 0; c < type->vector_elements; c++) { 808 emit(fs_inst(BRW_OPCODE_MUL, 809 attr, 810 attr, 811 this->pixel_w)); 812 attr.reg_offset++; 813 } 814 location++; 815 } 816 } 817 818 hash_table_insert(this->variable_ht, reg, ir); 819} 820 821void 822fs_visitor::visit(ir_variable *ir) 823{ 824 fs_reg *reg = NULL; 825 826 if (variable_storage(ir)) 827 return; 828 829 if (strcmp(ir->name, "gl_FragColor") == 0) { 830 this->frag_color = ir; 831 } else if (strcmp(ir->name, "gl_FragData") == 0) { 832 this->frag_data = ir; 833 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 834 this->frag_depth = ir; 835 } 836 837 if (ir->mode == ir_var_in) { 838 if (!strcmp(ir->name, "gl_FragCoord")) { 839 emit_fragcoord_interpolation(ir); 840 return; 841 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 842 reg = new(this->mem_ctx) fs_reg(this, ir->type); 843 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 844 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 845 * us front face 846 */ 847 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 848 *reg, 849 fs_reg(r1_6ud), 850 fs_reg(1u << 31))); 851 inst->conditional_mod = BRW_CONDITIONAL_L; 852 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 853 } else { 854 emit_general_interpolation(ir); 855 return; 856 } 857 } 858 859 if (ir->mode == ir_var_uniform) { 860 int param_index = c->prog_data.nr_params; 861 862 if (!strncmp(ir->name, "gl_", 3)) { 863 setup_builtin_uniform_values(ir); 864 } else { 865 setup_uniform_values(ir->location, ir->type); 866 } 867 868 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 869 } 870 871 if (!reg) 872 reg = new(this->mem_ctx) fs_reg(this, ir->type); 873 874 hash_table_insert(this->variable_ht, reg, ir); 875} 876 877void 878fs_visitor::visit(ir_dereference_variable *ir) 879{ 880 fs_reg *reg = variable_storage(ir->var); 881 this->result = *reg; 882} 883 884void 885fs_visitor::visit(ir_dereference_record *ir) 886{ 887 const glsl_type *struct_type = ir->record->type; 888 889 ir->record->accept(this); 890 891 unsigned int offset = 0; 892 for (unsigned int i = 0; i < struct_type->length; i++) { 893 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 894 break; 895 offset += type_size(struct_type->fields.structure[i].type); 896 } 897 this->result.reg_offset += offset; 898 this->result.type = brw_type_for_base_type(ir->type); 899} 900 901void 902fs_visitor::visit(ir_dereference_array *ir) 903{ 904 ir_constant *index; 905 int element_size; 906 907 ir->array->accept(this); 908 index = ir->array_index->as_constant(); 909 910 element_size = type_size(ir->type); 911 this->result.type = brw_type_for_base_type(ir->type); 912 913 if (index) { 914 assert(this->result.file == UNIFORM || 915 (this->result.file == GRF && 916 this->result.reg != 0)); 917 this->result.reg_offset += index->value.i[0] * element_size; 918 } else { 919 assert(!"FINISHME: non-constant array element"); 920 } 921} 922 923void 924fs_visitor::visit(ir_expression *ir) 925{ 926 unsigned int operand; 927 fs_reg op[2], temp; 928 fs_reg result; 929 fs_inst *inst; 930 931 for (operand = 0; operand < ir->get_num_operands(); operand++) { 932 ir->operands[operand]->accept(this); 933 if (this->result.file == BAD_FILE) { 934 ir_print_visitor v; 935 printf("Failed to get tree for expression operand:\n"); 936 ir->operands[operand]->accept(&v); 937 this->fail = true; 938 } 939 op[operand] = this->result; 940 941 /* Matrix expression operands should have been broken down to vector 942 * operations already. 943 */ 944 assert(!ir->operands[operand]->type->is_matrix()); 945 /* And then those vector operands should have been broken down to scalar. 946 */ 947 assert(!ir->operands[operand]->type->is_vector()); 948 } 949 950 /* Storage for our result. If our result goes into an assignment, it will 951 * just get copy-propagated out, so no worries. 952 */ 953 this->result = fs_reg(this, ir->type); 954 955 switch (ir->operation) { 956 case ir_unop_logic_not: 957 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 958 break; 959 case ir_unop_neg: 960 op[0].negate = !op[0].negate; 961 this->result = op[0]; 962 break; 963 case ir_unop_abs: 964 op[0].abs = true; 965 this->result = op[0]; 966 break; 967 case ir_unop_sign: 968 temp = fs_reg(this, ir->type); 969 970 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 971 972 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 973 inst->conditional_mod = BRW_CONDITIONAL_G; 974 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 975 inst->predicated = true; 976 977 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 978 inst->conditional_mod = BRW_CONDITIONAL_L; 979 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 980 inst->predicated = true; 981 982 break; 983 case ir_unop_rcp: 984 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 985 break; 986 987 case ir_unop_exp2: 988 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 989 break; 990 case ir_unop_log2: 991 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 992 break; 993 case ir_unop_exp: 994 case ir_unop_log: 995 assert(!"not reached: should be handled by ir_explog_to_explog2"); 996 break; 997 case ir_unop_sin: 998 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 999 break; 1000 case ir_unop_cos: 1001 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 1002 break; 1003 1004 case ir_unop_dFdx: 1005 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 1006 break; 1007 case ir_unop_dFdy: 1008 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 1009 break; 1010 1011 case ir_binop_add: 1012 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 1013 break; 1014 case ir_binop_sub: 1015 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 1016 break; 1017 1018 case ir_binop_mul: 1019 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 1020 break; 1021 case ir_binop_div: 1022 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1023 break; 1024 case ir_binop_mod: 1025 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1026 break; 1027 1028 case ir_binop_less: 1029 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1030 inst->conditional_mod = BRW_CONDITIONAL_L; 1031 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1032 break; 1033 case ir_binop_greater: 1034 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1035 inst->conditional_mod = BRW_CONDITIONAL_G; 1036 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1037 break; 1038 case ir_binop_lequal: 1039 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1040 inst->conditional_mod = BRW_CONDITIONAL_LE; 1041 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1042 break; 1043 case ir_binop_gequal: 1044 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1045 inst->conditional_mod = BRW_CONDITIONAL_GE; 1046 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1047 break; 1048 case ir_binop_equal: 1049 case ir_binop_all_equal: /* same as nequal for scalars */ 1050 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1051 inst->conditional_mod = BRW_CONDITIONAL_Z; 1052 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1053 break; 1054 case ir_binop_nequal: 1055 case ir_binop_any_nequal: /* same as nequal for scalars */ 1056 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1057 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1058 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1059 break; 1060 1061 case ir_binop_logic_xor: 1062 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1063 break; 1064 1065 case ir_binop_logic_or: 1066 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1067 break; 1068 1069 case ir_binop_logic_and: 1070 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1071 break; 1072 1073 case ir_binop_dot: 1074 case ir_binop_cross: 1075 case ir_unop_any: 1076 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 1077 break; 1078 1079 case ir_unop_noise: 1080 assert(!"not reached: should be handled by lower_noise"); 1081 break; 1082 1083 case ir_unop_sqrt: 1084 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 1085 break; 1086 1087 case ir_unop_rsq: 1088 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 1089 break; 1090 1091 case ir_unop_i2f: 1092 case ir_unop_b2f: 1093 case ir_unop_b2i: 1094 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1095 break; 1096 case ir_unop_f2i: 1097 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1098 break; 1099 case ir_unop_f2b: 1100 case ir_unop_i2b: 1101 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 1102 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1103 1104 case ir_unop_trunc: 1105 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1106 break; 1107 case ir_unop_ceil: 1108 op[0].negate = ~op[0].negate; 1109 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1110 this->result.negate = true; 1111 break; 1112 case ir_unop_floor: 1113 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1114 break; 1115 case ir_unop_fract: 1116 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1117 break; 1118 1119 case ir_binop_min: 1120 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1121 inst->conditional_mod = BRW_CONDITIONAL_L; 1122 1123 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1124 inst->predicated = true; 1125 break; 1126 case ir_binop_max: 1127 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1128 inst->conditional_mod = BRW_CONDITIONAL_G; 1129 1130 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1131 inst->predicated = true; 1132 break; 1133 1134 case ir_binop_pow: 1135 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1136 break; 1137 1138 case ir_unop_bit_not: 1139 case ir_unop_u2f: 1140 case ir_binop_lshift: 1141 case ir_binop_rshift: 1142 case ir_binop_bit_and: 1143 case ir_binop_bit_xor: 1144 case ir_binop_bit_or: 1145 assert(!"GLSL 1.30 features unsupported"); 1146 break; 1147 } 1148} 1149 1150void 1151fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1152 const glsl_type *type, bool predicated) 1153{ 1154 switch (type->base_type) { 1155 case GLSL_TYPE_FLOAT: 1156 case GLSL_TYPE_UINT: 1157 case GLSL_TYPE_INT: 1158 case GLSL_TYPE_BOOL: 1159 for (unsigned int i = 0; i < type->components(); i++) { 1160 l.type = brw_type_for_base_type(type); 1161 r.type = brw_type_for_base_type(type); 1162 1163 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1164 inst->predicated = predicated; 1165 1166 l.reg_offset++; 1167 r.reg_offset++; 1168 } 1169 break; 1170 case GLSL_TYPE_ARRAY: 1171 for (unsigned int i = 0; i < type->length; i++) { 1172 emit_assignment_writes(l, r, type->fields.array, predicated); 1173 } 1174 1175 case GLSL_TYPE_STRUCT: 1176 for (unsigned int i = 0; i < type->length; i++) { 1177 emit_assignment_writes(l, r, type->fields.structure[i].type, 1178 predicated); 1179 } 1180 break; 1181 1182 case GLSL_TYPE_SAMPLER: 1183 break; 1184 1185 default: 1186 assert(!"not reached"); 1187 break; 1188 } 1189} 1190 1191void 1192fs_visitor::visit(ir_assignment *ir) 1193{ 1194 struct fs_reg l, r; 1195 fs_inst *inst; 1196 1197 /* FINISHME: arrays on the lhs */ 1198 ir->lhs->accept(this); 1199 l = this->result; 1200 1201 ir->rhs->accept(this); 1202 r = this->result; 1203 1204 assert(l.file != BAD_FILE); 1205 assert(r.file != BAD_FILE); 1206 1207 if (ir->condition) { 1208 /* Get the condition bool into the predicate. */ 1209 ir->condition->accept(this); 1210 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1211 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1212 } 1213 1214 if (ir->lhs->type->is_scalar() || 1215 ir->lhs->type->is_vector()) { 1216 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1217 if (ir->write_mask & (1 << i)) { 1218 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1219 if (ir->condition) 1220 inst->predicated = true; 1221 r.reg_offset++; 1222 } 1223 l.reg_offset++; 1224 } 1225 } else { 1226 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1227 } 1228} 1229 1230fs_inst * 1231fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1232{ 1233 int mlen; 1234 int base_mrf = 2; 1235 bool simd16 = false; 1236 fs_reg orig_dst; 1237 1238 if (ir->shadow_comparitor) { 1239 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1240 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1241 coordinate)); 1242 coordinate.reg_offset++; 1243 } 1244 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1245 mlen = 3; 1246 1247 if (ir->op == ir_tex) { 1248 /* There's no plain shadow compare message, so we use shadow 1249 * compare with a bias of 0.0. 1250 */ 1251 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1252 fs_reg(0.0f))); 1253 mlen++; 1254 } else if (ir->op == ir_txb) { 1255 ir->lod_info.bias->accept(this); 1256 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1257 this->result)); 1258 mlen++; 1259 } else { 1260 assert(ir->op == ir_txl); 1261 ir->lod_info.lod->accept(this); 1262 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1263 this->result)); 1264 mlen++; 1265 } 1266 1267 ir->shadow_comparitor->accept(this); 1268 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1269 mlen++; 1270 } else if (ir->op == ir_tex) { 1271 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1272 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1273 coordinate)); 1274 coordinate.reg_offset++; 1275 } 1276 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1277 mlen = 3; 1278 } else { 1279 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1280 * instructions. We'll need to do SIMD16 here. 1281 */ 1282 assert(ir->op == ir_txb || ir->op == ir_txl); 1283 1284 for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) { 1285 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1286 coordinate)); 1287 coordinate.reg_offset++; 1288 mlen++; 1289 1290 /* The unused upper half. */ 1291 mlen++; 1292 } 1293 1294 /* lod/bias appears after u/v/r. */ 1295 mlen = 6; 1296 1297 if (ir->op == ir_txb) { 1298 ir->lod_info.bias->accept(this); 1299 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1300 this->result)); 1301 mlen++; 1302 } else { 1303 ir->lod_info.lod->accept(this); 1304 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1305 this->result)); 1306 mlen++; 1307 } 1308 1309 /* The unused upper half. */ 1310 mlen++; 1311 1312 /* Now, since we're doing simd16, the return is 2 interleaved 1313 * vec4s where the odd-indexed ones are junk. We'll need to move 1314 * this weirdness around to the expected layout. 1315 */ 1316 simd16 = true; 1317 orig_dst = dst; 1318 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1319 2)); 1320 dst.type = BRW_REGISTER_TYPE_F; 1321 } 1322 1323 fs_inst *inst = NULL; 1324 switch (ir->op) { 1325 case ir_tex: 1326 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1327 break; 1328 case ir_txb: 1329 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1330 break; 1331 case ir_txl: 1332 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1333 break; 1334 case ir_txd: 1335 case ir_txf: 1336 assert(!"GLSL 1.30 features unsupported"); 1337 break; 1338 } 1339 inst->mlen = mlen; 1340 1341 if (simd16) { 1342 for (int i = 0; i < 4; i++) { 1343 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1344 orig_dst.reg_offset++; 1345 dst.reg_offset += 2; 1346 } 1347 } 1348 1349 return inst; 1350} 1351 1352fs_inst * 1353fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1354{ 1355 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1356 * optional parameters like shadow comparitor or LOD bias. If 1357 * optional parameters aren't present, those base slots are 1358 * optional and don't need to be included in the message. 1359 * 1360 * We don't fill in the unnecessary slots regardless, which may 1361 * look surprising in the disassembly. 1362 */ 1363 int mlen; 1364 int base_mrf = 2; 1365 1366 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1367 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1368 coordinate.reg_offset++; 1369 } 1370 1371 if (ir->shadow_comparitor) { 1372 mlen = MAX2(mlen, 4); 1373 1374 ir->shadow_comparitor->accept(this); 1375 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1376 mlen++; 1377 } 1378 1379 fs_inst *inst = NULL; 1380 switch (ir->op) { 1381 case ir_tex: 1382 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1383 break; 1384 case ir_txb: 1385 ir->lod_info.bias->accept(this); 1386 mlen = MAX2(mlen, 4); 1387 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1388 mlen++; 1389 1390 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1391 break; 1392 case ir_txl: 1393 ir->lod_info.lod->accept(this); 1394 mlen = MAX2(mlen, 4); 1395 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1396 mlen++; 1397 1398 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1399 break; 1400 case ir_txd: 1401 case ir_txf: 1402 assert(!"GLSL 1.30 features unsupported"); 1403 break; 1404 } 1405 inst->mlen = mlen; 1406 1407 return inst; 1408} 1409 1410void 1411fs_visitor::visit(ir_texture *ir) 1412{ 1413 fs_inst *inst = NULL; 1414 1415 ir->coordinate->accept(this); 1416 fs_reg coordinate = this->result; 1417 1418 /* Should be lowered by do_lower_texture_projection */ 1419 assert(!ir->projector); 1420 1421 /* Writemasking doesn't eliminate channels on SIMD8 texture 1422 * samples, so don't worry about them. 1423 */ 1424 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1425 1426 if (intel->gen < 5) { 1427 inst = emit_texture_gen4(ir, dst, coordinate); 1428 } else { 1429 inst = emit_texture_gen5(ir, dst, coordinate); 1430 } 1431 1432 inst->sampler = 1433 _mesa_get_sampler_uniform_value(ir->sampler, 1434 ctx->Shader.CurrentProgram, 1435 &brw->fragment_program->Base); 1436 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1437 1438 this->result = dst; 1439 1440 if (ir->shadow_comparitor) 1441 inst->shadow_compare = true; 1442 1443 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1444 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1445 1446 for (int i = 0; i < 4; i++) { 1447 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1448 fs_reg l = swizzle_dst; 1449 l.reg_offset += i; 1450 1451 if (swiz == SWIZZLE_ZERO) { 1452 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1453 } else if (swiz == SWIZZLE_ONE) { 1454 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1455 } else { 1456 fs_reg r = dst; 1457 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1458 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1459 } 1460 } 1461 this->result = swizzle_dst; 1462 } 1463} 1464 1465void 1466fs_visitor::visit(ir_swizzle *ir) 1467{ 1468 ir->val->accept(this); 1469 fs_reg val = this->result; 1470 1471 if (ir->type->vector_elements == 1) { 1472 this->result.reg_offset += ir->mask.x; 1473 return; 1474 } 1475 1476 fs_reg result = fs_reg(this, ir->type); 1477 this->result = result; 1478 1479 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1480 fs_reg channel = val; 1481 int swiz = 0; 1482 1483 switch (i) { 1484 case 0: 1485 swiz = ir->mask.x; 1486 break; 1487 case 1: 1488 swiz = ir->mask.y; 1489 break; 1490 case 2: 1491 swiz = ir->mask.z; 1492 break; 1493 case 3: 1494 swiz = ir->mask.w; 1495 break; 1496 } 1497 1498 channel.reg_offset += swiz; 1499 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1500 result.reg_offset++; 1501 } 1502} 1503 1504void 1505fs_visitor::visit(ir_discard *ir) 1506{ 1507 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1508 1509 assert(ir->condition == NULL); /* FINISHME */ 1510 1511 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp)); 1512} 1513 1514void 1515fs_visitor::visit(ir_constant *ir) 1516{ 1517 fs_reg reg(this, ir->type); 1518 this->result = reg; 1519 1520 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1521 switch (ir->type->base_type) { 1522 case GLSL_TYPE_FLOAT: 1523 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1524 break; 1525 case GLSL_TYPE_UINT: 1526 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1527 break; 1528 case GLSL_TYPE_INT: 1529 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1530 break; 1531 case GLSL_TYPE_BOOL: 1532 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1533 break; 1534 default: 1535 assert(!"Non-float/uint/int/bool constant"); 1536 } 1537 reg.reg_offset++; 1538 } 1539} 1540 1541void 1542fs_visitor::visit(ir_if *ir) 1543{ 1544 fs_inst *inst; 1545 1546 /* Don't point the annotation at the if statement, because then it plus 1547 * the then and else blocks get printed. 1548 */ 1549 this->base_ir = ir->condition; 1550 1551 /* Generate the condition into the condition code. */ 1552 ir->condition->accept(this); 1553 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1554 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1555 1556 inst = emit(fs_inst(BRW_OPCODE_IF)); 1557 inst->predicated = true; 1558 1559 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1560 ir_instruction *ir = (ir_instruction *)iter.get(); 1561 this->base_ir = ir; 1562 1563 ir->accept(this); 1564 } 1565 1566 if (!ir->else_instructions.is_empty()) { 1567 emit(fs_inst(BRW_OPCODE_ELSE)); 1568 1569 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1570 ir_instruction *ir = (ir_instruction *)iter.get(); 1571 this->base_ir = ir; 1572 1573 ir->accept(this); 1574 } 1575 } 1576 1577 emit(fs_inst(BRW_OPCODE_ENDIF)); 1578} 1579 1580void 1581fs_visitor::visit(ir_loop *ir) 1582{ 1583 fs_reg counter = reg_undef; 1584 1585 if (ir->counter) { 1586 this->base_ir = ir->counter; 1587 ir->counter->accept(this); 1588 counter = *(variable_storage(ir->counter)); 1589 1590 if (ir->from) { 1591 this->base_ir = ir->from; 1592 ir->from->accept(this); 1593 1594 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1595 } 1596 } 1597 1598 emit(fs_inst(BRW_OPCODE_DO)); 1599 1600 if (ir->to) { 1601 this->base_ir = ir->to; 1602 ir->to->accept(this); 1603 1604 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1605 counter, this->result)); 1606 switch (ir->cmp) { 1607 case ir_binop_equal: 1608 inst->conditional_mod = BRW_CONDITIONAL_Z; 1609 break; 1610 case ir_binop_nequal: 1611 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1612 break; 1613 case ir_binop_gequal: 1614 inst->conditional_mod = BRW_CONDITIONAL_GE; 1615 break; 1616 case ir_binop_lequal: 1617 inst->conditional_mod = BRW_CONDITIONAL_LE; 1618 break; 1619 case ir_binop_greater: 1620 inst->conditional_mod = BRW_CONDITIONAL_G; 1621 break; 1622 case ir_binop_less: 1623 inst->conditional_mod = BRW_CONDITIONAL_L; 1624 break; 1625 default: 1626 assert(!"not reached: unknown loop condition"); 1627 this->fail = true; 1628 break; 1629 } 1630 1631 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1632 inst->predicated = true; 1633 } 1634 1635 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1636 ir_instruction *ir = (ir_instruction *)iter.get(); 1637 1638 this->base_ir = ir; 1639 ir->accept(this); 1640 } 1641 1642 if (ir->increment) { 1643 this->base_ir = ir->increment; 1644 ir->increment->accept(this); 1645 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1646 } 1647 1648 emit(fs_inst(BRW_OPCODE_WHILE)); 1649} 1650 1651void 1652fs_visitor::visit(ir_loop_jump *ir) 1653{ 1654 switch (ir->mode) { 1655 case ir_loop_jump::jump_break: 1656 emit(fs_inst(BRW_OPCODE_BREAK)); 1657 break; 1658 case ir_loop_jump::jump_continue: 1659 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1660 break; 1661 } 1662} 1663 1664void 1665fs_visitor::visit(ir_call *ir) 1666{ 1667 assert(!"FINISHME"); 1668} 1669 1670void 1671fs_visitor::visit(ir_return *ir) 1672{ 1673 assert(!"FINISHME"); 1674} 1675 1676void 1677fs_visitor::visit(ir_function *ir) 1678{ 1679 /* Ignore function bodies other than main() -- we shouldn't see calls to 1680 * them since they should all be inlined before we get to ir_to_mesa. 1681 */ 1682 if (strcmp(ir->name, "main") == 0) { 1683 const ir_function_signature *sig; 1684 exec_list empty; 1685 1686 sig = ir->matching_signature(&empty); 1687 1688 assert(sig); 1689 1690 foreach_iter(exec_list_iterator, iter, sig->body) { 1691 ir_instruction *ir = (ir_instruction *)iter.get(); 1692 this->base_ir = ir; 1693 1694 ir->accept(this); 1695 } 1696 } 1697} 1698 1699void 1700fs_visitor::visit(ir_function_signature *ir) 1701{ 1702 assert(!"not reached"); 1703 (void)ir; 1704} 1705 1706fs_inst * 1707fs_visitor::emit(fs_inst inst) 1708{ 1709 fs_inst *list_inst = new(mem_ctx) fs_inst; 1710 *list_inst = inst; 1711 1712 list_inst->annotation = this->current_annotation; 1713 list_inst->ir = this->base_ir; 1714 1715 this->instructions.push_tail(list_inst); 1716 1717 return list_inst; 1718} 1719 1720/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1721void 1722fs_visitor::emit_dummy_fs() 1723{ 1724 /* Everyone's favorite color. */ 1725 emit(fs_inst(BRW_OPCODE_MOV, 1726 fs_reg(MRF, 2), 1727 fs_reg(1.0f))); 1728 emit(fs_inst(BRW_OPCODE_MOV, 1729 fs_reg(MRF, 3), 1730 fs_reg(0.0f))); 1731 emit(fs_inst(BRW_OPCODE_MOV, 1732 fs_reg(MRF, 4), 1733 fs_reg(1.0f))); 1734 emit(fs_inst(BRW_OPCODE_MOV, 1735 fs_reg(MRF, 5), 1736 fs_reg(0.0f))); 1737 1738 fs_inst *write; 1739 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1740 fs_reg(0), 1741 fs_reg(0))); 1742} 1743 1744/* The register location here is relative to the start of the URB 1745 * data. It will get adjusted to be a real location before 1746 * generate_code() time. 1747 */ 1748struct brw_reg 1749fs_visitor::interp_reg(int location, int channel) 1750{ 1751 int regnr = urb_setup[location] * 2 + channel / 2; 1752 int stride = (channel & 1) * 4; 1753 1754 assert(urb_setup[location] != -1); 1755 1756 return brw_vec1_grf(regnr, stride); 1757} 1758 1759/** Emits the interpolation for the varying inputs. */ 1760void 1761fs_visitor::emit_interpolation_setup_gen4() 1762{ 1763 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1764 1765 this->current_annotation = "compute pixel centers"; 1766 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1767 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1768 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1769 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1770 emit(fs_inst(BRW_OPCODE_ADD, 1771 this->pixel_x, 1772 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1773 fs_reg(brw_imm_v(0x10101010)))); 1774 emit(fs_inst(BRW_OPCODE_ADD, 1775 this->pixel_y, 1776 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1777 fs_reg(brw_imm_v(0x11001100)))); 1778 1779 this->current_annotation = "compute pixel deltas from v0"; 1780 if (brw->has_pln) { 1781 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1782 this->delta_y = this->delta_x; 1783 this->delta_y.reg_offset++; 1784 } else { 1785 this->delta_x = fs_reg(this, glsl_type::float_type); 1786 this->delta_y = fs_reg(this, glsl_type::float_type); 1787 } 1788 emit(fs_inst(BRW_OPCODE_ADD, 1789 this->delta_x, 1790 this->pixel_x, 1791 fs_reg(negate(brw_vec1_grf(1, 0))))); 1792 emit(fs_inst(BRW_OPCODE_ADD, 1793 this->delta_y, 1794 this->pixel_y, 1795 fs_reg(negate(brw_vec1_grf(1, 1))))); 1796 1797 this->current_annotation = "compute pos.w and 1/pos.w"; 1798 /* Compute wpos.w. It's always in our setup, since it's needed to 1799 * interpolate the other attributes. 1800 */ 1801 this->wpos_w = fs_reg(this, glsl_type::float_type); 1802 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1803 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1804 /* Compute the pixel 1/W value from wpos.w. */ 1805 this->pixel_w = fs_reg(this, glsl_type::float_type); 1806 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1807 this->current_annotation = NULL; 1808} 1809 1810/** Emits the interpolation for the varying inputs. */ 1811void 1812fs_visitor::emit_interpolation_setup_gen6() 1813{ 1814 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1815 1816 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1817 this->current_annotation = "compute pixel centers"; 1818 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1819 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1820 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1821 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1822 emit(fs_inst(BRW_OPCODE_ADD, 1823 this->pixel_x, 1824 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1825 fs_reg(brw_imm_v(0x10101010)))); 1826 emit(fs_inst(BRW_OPCODE_ADD, 1827 this->pixel_y, 1828 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1829 fs_reg(brw_imm_v(0x11001100)))); 1830 1831 this->current_annotation = "compute 1/pos.w"; 1832 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1833 this->pixel_w = fs_reg(this, glsl_type::float_type); 1834 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1835 1836 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1837 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1838 1839 this->current_annotation = NULL; 1840} 1841 1842void 1843fs_visitor::emit_fb_writes() 1844{ 1845 this->current_annotation = "FB write header"; 1846 int nr = 0; 1847 1848 /* m0, m1 header */ 1849 nr += 2; 1850 1851 if (c->key.aa_dest_stencil_reg) { 1852 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1853 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1854 } 1855 1856 /* Reserve space for color. It'll be filled in per MRT below. */ 1857 int color_mrf = nr; 1858 nr += 4; 1859 1860 if (c->key.source_depth_to_render_target) { 1861 if (c->key.computes_depth) { 1862 /* Hand over gl_FragDepth. */ 1863 assert(this->frag_depth); 1864 fs_reg depth = *(variable_storage(this->frag_depth)); 1865 1866 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1867 } else { 1868 /* Pass through the payload depth. */ 1869 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1870 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1871 } 1872 } 1873 1874 if (c->key.dest_depth_reg) { 1875 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1876 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1877 } 1878 1879 fs_reg color = reg_undef; 1880 if (this->frag_color) 1881 color = *(variable_storage(this->frag_color)); 1882 else if (this->frag_data) 1883 color = *(variable_storage(this->frag_data)); 1884 1885 for (int target = 0; target < c->key.nr_color_regions; target++) { 1886 this->current_annotation = talloc_asprintf(this->mem_ctx, 1887 "FB write target %d", 1888 target); 1889 if (this->frag_color || this->frag_data) { 1890 for (int i = 0; i < 4; i++) { 1891 emit(fs_inst(BRW_OPCODE_MOV, 1892 fs_reg(MRF, color_mrf + i), 1893 color)); 1894 color.reg_offset++; 1895 } 1896 } 1897 1898 if (this->frag_color) 1899 color.reg_offset -= 4; 1900 1901 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1902 reg_undef, reg_undef)); 1903 inst->target = target; 1904 inst->mlen = nr; 1905 if (target == c->key.nr_color_regions - 1) 1906 inst->eot = true; 1907 } 1908 1909 if (c->key.nr_color_regions == 0) { 1910 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1911 reg_undef, reg_undef)); 1912 inst->mlen = nr; 1913 inst->eot = true; 1914 } 1915 1916 this->current_annotation = NULL; 1917} 1918 1919void 1920fs_visitor::generate_fb_write(fs_inst *inst) 1921{ 1922 GLboolean eot = inst->eot; 1923 1924 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1925 * move, here's g1. 1926 */ 1927 brw_push_insn_state(p); 1928 brw_set_mask_control(p, BRW_MASK_DISABLE); 1929 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1930 brw_MOV(p, 1931 brw_message_reg(1), 1932 brw_vec8_grf(1, 0)); 1933 brw_pop_insn_state(p); 1934 1935 brw_fb_WRITE(p, 1936 8, /* dispatch_width */ 1937 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1938 0, /* base MRF */ 1939 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1940 inst->target, 1941 inst->mlen, 1942 0, 1943 eot); 1944} 1945 1946void 1947fs_visitor::generate_linterp(fs_inst *inst, 1948 struct brw_reg dst, struct brw_reg *src) 1949{ 1950 struct brw_reg delta_x = src[0]; 1951 struct brw_reg delta_y = src[1]; 1952 struct brw_reg interp = src[2]; 1953 1954 if (brw->has_pln && 1955 delta_y.nr == delta_x.nr + 1 && 1956 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1957 brw_PLN(p, dst, interp, delta_x); 1958 } else { 1959 brw_LINE(p, brw_null_reg(), interp, delta_x); 1960 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1961 } 1962} 1963 1964void 1965fs_visitor::generate_math(fs_inst *inst, 1966 struct brw_reg dst, struct brw_reg *src) 1967{ 1968 int op; 1969 1970 switch (inst->opcode) { 1971 case FS_OPCODE_RCP: 1972 op = BRW_MATH_FUNCTION_INV; 1973 break; 1974 case FS_OPCODE_RSQ: 1975 op = BRW_MATH_FUNCTION_RSQ; 1976 break; 1977 case FS_OPCODE_SQRT: 1978 op = BRW_MATH_FUNCTION_SQRT; 1979 break; 1980 case FS_OPCODE_EXP2: 1981 op = BRW_MATH_FUNCTION_EXP; 1982 break; 1983 case FS_OPCODE_LOG2: 1984 op = BRW_MATH_FUNCTION_LOG; 1985 break; 1986 case FS_OPCODE_POW: 1987 op = BRW_MATH_FUNCTION_POW; 1988 break; 1989 case FS_OPCODE_SIN: 1990 op = BRW_MATH_FUNCTION_SIN; 1991 break; 1992 case FS_OPCODE_COS: 1993 op = BRW_MATH_FUNCTION_COS; 1994 break; 1995 default: 1996 assert(!"not reached: unknown math function"); 1997 op = 0; 1998 break; 1999 } 2000 2001 if (inst->opcode == FS_OPCODE_POW) { 2002 brw_MOV(p, brw_message_reg(3), src[1]); 2003 } 2004 2005 brw_math(p, dst, 2006 op, 2007 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2008 BRW_MATH_SATURATE_NONE, 2009 2, src[0], 2010 BRW_MATH_DATA_VECTOR, 2011 BRW_MATH_PRECISION_FULL); 2012} 2013 2014void 2015fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2016{ 2017 int msg_type = -1; 2018 int rlen = 4; 2019 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2020 2021 if (intel->gen == 5) { 2022 switch (inst->opcode) { 2023 case FS_OPCODE_TEX: 2024 if (inst->shadow_compare) { 2025 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2026 } else { 2027 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2028 } 2029 break; 2030 case FS_OPCODE_TXB: 2031 if (inst->shadow_compare) { 2032 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2033 } else { 2034 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2035 } 2036 break; 2037 } 2038 } else { 2039 switch (inst->opcode) { 2040 case FS_OPCODE_TEX: 2041 /* Note that G45 and older determines shadow compare and dispatch width 2042 * from message length for most messages. 2043 */ 2044 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2045 if (inst->shadow_compare) { 2046 assert(inst->mlen == 5); 2047 } else { 2048 assert(inst->mlen <= 6); 2049 } 2050 break; 2051 case FS_OPCODE_TXB: 2052 if (inst->shadow_compare) { 2053 assert(inst->mlen == 5); 2054 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2055 } else { 2056 assert(inst->mlen == 8); 2057 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2058 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2059 } 2060 break; 2061 } 2062 } 2063 assert(msg_type != -1); 2064 2065 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2066 rlen = 8; 2067 dst = vec16(dst); 2068 } 2069 2070 /* g0 header. */ 2071 src.nr--; 2072 2073 brw_SAMPLE(p, 2074 retype(dst, BRW_REGISTER_TYPE_UW), 2075 src.nr, 2076 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2077 SURF_INDEX_TEXTURE(inst->sampler), 2078 inst->sampler, 2079 WRITEMASK_XYZW, 2080 msg_type, 2081 rlen, 2082 inst->mlen + 1, 2083 0, 2084 1, 2085 simd_mode); 2086} 2087 2088 2089/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2090 * looking like: 2091 * 2092 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2093 * 2094 * and we're trying to produce: 2095 * 2096 * DDX DDY 2097 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2098 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2099 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2100 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2101 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2102 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2103 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2104 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2105 * 2106 * and add another set of two more subspans if in 16-pixel dispatch mode. 2107 * 2108 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2109 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2110 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2111 * between each other. We could probably do it like ddx and swizzle the right 2112 * order later, but bail for now and just produce 2113 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2114 */ 2115void 2116fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2117{ 2118 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2119 BRW_REGISTER_TYPE_F, 2120 BRW_VERTICAL_STRIDE_2, 2121 BRW_WIDTH_2, 2122 BRW_HORIZONTAL_STRIDE_0, 2123 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2124 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2125 BRW_REGISTER_TYPE_F, 2126 BRW_VERTICAL_STRIDE_2, 2127 BRW_WIDTH_2, 2128 BRW_HORIZONTAL_STRIDE_0, 2129 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2130 brw_ADD(p, dst, src0, negate(src1)); 2131} 2132 2133void 2134fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2135{ 2136 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2137 BRW_REGISTER_TYPE_F, 2138 BRW_VERTICAL_STRIDE_4, 2139 BRW_WIDTH_4, 2140 BRW_HORIZONTAL_STRIDE_0, 2141 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2142 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2143 BRW_REGISTER_TYPE_F, 2144 BRW_VERTICAL_STRIDE_4, 2145 BRW_WIDTH_4, 2146 BRW_HORIZONTAL_STRIDE_0, 2147 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2148 brw_ADD(p, dst, src0, negate(src1)); 2149} 2150 2151void 2152fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp) 2153{ 2154 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2155 temp = brw_uw1_reg(temp.file, temp.nr, 0); 2156 2157 brw_push_insn_state(p); 2158 brw_set_mask_control(p, BRW_MASK_DISABLE); 2159 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */ 2160 brw_AND(p, g0, temp, g0); 2161 brw_pop_insn_state(p); 2162} 2163 2164void 2165fs_visitor::assign_curb_setup() 2166{ 2167 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 2168 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2169 2170 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2171 foreach_iter(exec_list_iterator, iter, this->instructions) { 2172 fs_inst *inst = (fs_inst *)iter.get(); 2173 2174 for (unsigned int i = 0; i < 3; i++) { 2175 if (inst->src[i].file == UNIFORM) { 2176 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2177 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2178 constant_nr / 8, 2179 constant_nr % 8); 2180 2181 inst->src[i].file = FIXED_HW_REG; 2182 inst->src[i].fixed_hw_reg = brw_reg; 2183 } 2184 } 2185 } 2186} 2187 2188void 2189fs_visitor::calculate_urb_setup() 2190{ 2191 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2192 urb_setup[i] = -1; 2193 } 2194 2195 int urb_next = 0; 2196 /* Figure out where each of the incoming setup attributes lands. */ 2197 if (intel->gen >= 6) { 2198 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2199 if (i == FRAG_ATTRIB_WPOS || 2200 (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) { 2201 urb_setup[i] = urb_next++; 2202 } 2203 } 2204 } else { 2205 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2206 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2207 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2208 int fp_index; 2209 2210 if (i >= VERT_RESULT_VAR0) 2211 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2212 else if (i <= VERT_RESULT_TEX7) 2213 fp_index = i; 2214 else 2215 fp_index = -1; 2216 2217 if (fp_index >= 0) 2218 urb_setup[fp_index] = urb_next++; 2219 } 2220 } 2221 } 2222 2223 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2224 c->prog_data.urb_read_length = urb_next * 2; 2225} 2226 2227void 2228fs_visitor::assign_urb_setup() 2229{ 2230 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2231 2232 /* Offset all the urb_setup[] index by the actual position of the 2233 * setup regs, now that the location of the constants has been chosen. 2234 */ 2235 foreach_iter(exec_list_iterator, iter, this->instructions) { 2236 fs_inst *inst = (fs_inst *)iter.get(); 2237 2238 if (inst->opcode != FS_OPCODE_LINTERP) 2239 continue; 2240 2241 assert(inst->src[2].file == FIXED_HW_REG); 2242 2243 inst->src[2].fixed_hw_reg.nr += urb_start; 2244 } 2245 2246 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2247} 2248 2249static void 2250assign_reg(int *reg_hw_locations, fs_reg *reg) 2251{ 2252 if (reg->file == GRF && reg->reg != 0) { 2253 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2254 reg->reg = 0; 2255 } 2256} 2257 2258void 2259fs_visitor::assign_regs_trivial() 2260{ 2261 int last_grf = 0; 2262 int hw_reg_mapping[this->virtual_grf_next]; 2263 int i; 2264 2265 hw_reg_mapping[0] = 0; 2266 hw_reg_mapping[1] = this->first_non_payload_grf; 2267 for (i = 2; i < this->virtual_grf_next; i++) { 2268 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2269 this->virtual_grf_sizes[i - 1]); 2270 } 2271 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2272 2273 foreach_iter(exec_list_iterator, iter, this->instructions) { 2274 fs_inst *inst = (fs_inst *)iter.get(); 2275 2276 assign_reg(hw_reg_mapping, &inst->dst); 2277 assign_reg(hw_reg_mapping, &inst->src[0]); 2278 assign_reg(hw_reg_mapping, &inst->src[1]); 2279 } 2280 2281 this->grf_used = last_grf + 1; 2282} 2283 2284void 2285fs_visitor::assign_regs() 2286{ 2287 int last_grf = 0; 2288 int hw_reg_mapping[this->virtual_grf_next + 1]; 2289 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2290 int class_sizes[base_reg_count]; 2291 int class_count = 0; 2292 int aligned_pair_class = -1; 2293 2294 /* Set up the register classes. 2295 * 2296 * The base registers store a scalar value. For texture samples, 2297 * we get virtual GRFs composed of 4 contiguous hw register. For 2298 * structures and arrays, we store them as contiguous larger things 2299 * than that, though we should be able to do better most of the 2300 * time. 2301 */ 2302 class_sizes[class_count++] = 1; 2303 if (brw->has_pln && intel->gen < 6) { 2304 /* Always set up the (unaligned) pairs for gen5, so we can find 2305 * them for making the aligned pair class. 2306 */ 2307 class_sizes[class_count++] = 2; 2308 } 2309 for (int r = 1; r < this->virtual_grf_next; r++) { 2310 int i; 2311 2312 for (i = 0; i < class_count; i++) { 2313 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2314 break; 2315 } 2316 if (i == class_count) { 2317 if (this->virtual_grf_sizes[r] >= base_reg_count) { 2318 fprintf(stderr, "Object too large to register allocate.\n"); 2319 this->fail = true; 2320 } 2321 2322 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2323 } 2324 } 2325 2326 int ra_reg_count = 0; 2327 int class_base_reg[class_count]; 2328 int class_reg_count[class_count]; 2329 int classes[class_count + 1]; 2330 2331 for (int i = 0; i < class_count; i++) { 2332 class_base_reg[i] = ra_reg_count; 2333 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2334 ra_reg_count += class_reg_count[i]; 2335 } 2336 2337 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2338 for (int i = 0; i < class_count; i++) { 2339 classes[i] = ra_alloc_reg_class(regs); 2340 2341 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2342 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2343 } 2344 2345 /* Add conflicts between our contiguous registers aliasing 2346 * base regs and other register classes' contiguous registers 2347 * that alias base regs, or the base regs themselves for classes[0]. 2348 */ 2349 for (int c = 0; c <= i; c++) { 2350 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2351 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2352 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); 2353 c_r++) { 2354 2355 if (0) { 2356 printf("%d/%d conflicts %d/%d\n", 2357 class_sizes[i], this->first_non_payload_grf + i_r, 2358 class_sizes[c], this->first_non_payload_grf + c_r); 2359 } 2360 2361 ra_add_reg_conflict(regs, 2362 class_base_reg[i] + i_r, 2363 class_base_reg[c] + c_r); 2364 } 2365 } 2366 } 2367 } 2368 2369 /* Add a special class for aligned pairs, which we'll put delta_x/y 2370 * in on gen5 so that we can do PLN. 2371 */ 2372 if (brw->has_pln && intel->gen < 6) { 2373 int reg_count = (base_reg_count - 1) / 2; 2374 int unaligned_pair_class = 1; 2375 assert(class_sizes[unaligned_pair_class] == 2); 2376 2377 aligned_pair_class = class_count; 2378 classes[aligned_pair_class] = ra_alloc_reg_class(regs); 2379 class_base_reg[aligned_pair_class] = 0; 2380 class_reg_count[aligned_pair_class] = 0; 2381 int start = (this->first_non_payload_grf & 1) ? 1 : 0; 2382 2383 for (int i = 0; i < reg_count; i++) { 2384 ra_class_add_reg(regs, classes[aligned_pair_class], 2385 class_base_reg[unaligned_pair_class] + i * 2 + start); 2386 } 2387 class_count++; 2388 } 2389 2390 ra_set_finalize(regs); 2391 2392 struct ra_graph *g = ra_alloc_interference_graph(regs, 2393 this->virtual_grf_next); 2394 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2395 * with nodes. 2396 */ 2397 ra_set_node_class(g, 0, classes[0]); 2398 2399 for (int i = 1; i < this->virtual_grf_next; i++) { 2400 for (int c = 0; c < class_count; c++) { 2401 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2402 if (aligned_pair_class >= 0 && 2403 this->delta_x.reg == i) { 2404 ra_set_node_class(g, i, classes[aligned_pair_class]); 2405 } else { 2406 ra_set_node_class(g, i, classes[c]); 2407 } 2408 break; 2409 } 2410 } 2411 2412 for (int j = 1; j < i; j++) { 2413 if (virtual_grf_interferes(i, j)) { 2414 ra_add_node_interference(g, i, j); 2415 } 2416 } 2417 } 2418 2419 /* FINISHME: Handle spilling */ 2420 if (!ra_allocate_no_spills(g)) { 2421 fprintf(stderr, "Failed to allocate registers.\n"); 2422 this->fail = true; 2423 return; 2424 } 2425 2426 /* Get the chosen virtual registers for each node, and map virtual 2427 * regs in the register classes back down to real hardware reg 2428 * numbers. 2429 */ 2430 hw_reg_mapping[0] = 0; /* unused */ 2431 for (int i = 1; i < this->virtual_grf_next; i++) { 2432 int reg = ra_get_node_reg(g, i); 2433 int hw_reg = -1; 2434 2435 for (int c = 0; c < class_count; c++) { 2436 if (reg >= class_base_reg[c] && 2437 reg < class_base_reg[c] + class_reg_count[c]) { 2438 hw_reg = reg - class_base_reg[c]; 2439 break; 2440 } 2441 } 2442 2443 assert(hw_reg != -1); 2444 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2445 last_grf = MAX2(last_grf, 2446 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2447 } 2448 2449 foreach_iter(exec_list_iterator, iter, this->instructions) { 2450 fs_inst *inst = (fs_inst *)iter.get(); 2451 2452 assign_reg(hw_reg_mapping, &inst->dst); 2453 assign_reg(hw_reg_mapping, &inst->src[0]); 2454 assign_reg(hw_reg_mapping, &inst->src[1]); 2455 } 2456 2457 this->grf_used = last_grf + 1; 2458 2459 talloc_free(g); 2460 talloc_free(regs); 2461} 2462 2463void 2464fs_visitor::calculate_live_intervals() 2465{ 2466 int num_vars = this->virtual_grf_next; 2467 int *def = talloc_array(mem_ctx, int, num_vars); 2468 int *use = talloc_array(mem_ctx, int, num_vars); 2469 int loop_depth = 0; 2470 int loop_start = 0; 2471 2472 for (int i = 0; i < num_vars; i++) { 2473 def[i] = 1 << 30; 2474 use[i] = -1; 2475 } 2476 2477 int ip = 0; 2478 foreach_iter(exec_list_iterator, iter, this->instructions) { 2479 fs_inst *inst = (fs_inst *)iter.get(); 2480 2481 if (inst->opcode == BRW_OPCODE_DO) { 2482 if (loop_depth++ == 0) 2483 loop_start = ip; 2484 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2485 loop_depth--; 2486 2487 if (loop_depth == 0) { 2488 /* FINISHME: 2489 * 2490 * Patches up any vars marked for use within the loop as 2491 * live until the end. This is conservative, as there 2492 * will often be variables defined and used inside the 2493 * loop but dead at the end of the loop body. 2494 */ 2495 for (int i = 0; i < num_vars; i++) { 2496 if (use[i] == loop_start) { 2497 use[i] = ip; 2498 } 2499 } 2500 } 2501 } else { 2502 int eip = ip; 2503 2504 if (loop_depth) 2505 eip = loop_start; 2506 2507 for (unsigned int i = 0; i < 3; i++) { 2508 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2509 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2510 } 2511 } 2512 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2513 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2514 } 2515 } 2516 2517 ip++; 2518 } 2519 2520 talloc_free(this->virtual_grf_def); 2521 talloc_free(this->virtual_grf_use); 2522 this->virtual_grf_def = def; 2523 this->virtual_grf_use = use; 2524} 2525 2526/** 2527 * Attempts to move immediate constants into the immediate 2528 * constant slot of following instructions. 2529 * 2530 * Immediate constants are a bit tricky -- they have to be in the last 2531 * operand slot, you can't do abs/negate on them, 2532 */ 2533 2534bool 2535fs_visitor::propagate_constants() 2536{ 2537 bool progress = false; 2538 2539 return false; 2540 2541 foreach_iter(exec_list_iterator, iter, this->instructions) { 2542 fs_inst *inst = (fs_inst *)iter.get(); 2543 2544 if (inst->opcode != BRW_OPCODE_MOV || 2545 inst->predicated || 2546 inst->dst.file != GRF || inst->src[0].file != IMM || 2547 inst->dst.type != inst->src[0].type) 2548 continue; 2549 2550 /* Don't bother with cases where we should have had the 2551 * operation on the constant folded in GLSL already. 2552 */ 2553 if (inst->saturate) 2554 continue; 2555 2556 /* Found a move of a constant to a GRF. Find anything else using the GRF 2557 * before it's written, and replace it with the constant if we can. 2558 */ 2559 exec_list_iterator scan_iter = iter; 2560 scan_iter.next(); 2561 for (; scan_iter.has_next(); scan_iter.next()) { 2562 fs_inst *scan_inst = (fs_inst *)scan_iter.get(); 2563 2564 if (scan_inst->opcode == BRW_OPCODE_DO || 2565 scan_inst->opcode == BRW_OPCODE_WHILE || 2566 scan_inst->opcode == BRW_OPCODE_ELSE || 2567 scan_inst->opcode == BRW_OPCODE_ENDIF) { 2568 break; 2569 } 2570 2571 for (int i = 2; i >= 0; i--) { 2572 if (scan_inst->src[i].file != GRF || 2573 scan_inst->src[i].reg != inst->dst.reg || 2574 scan_inst->src[i].reg_offset != inst->dst.reg_offset) 2575 continue; 2576 2577 /* Don't bother with cases where we should have had the 2578 * operation on the constant folded in GLSL already. 2579 */ 2580 if (scan_inst->src[i].negate || scan_inst->src[i].abs) 2581 continue; 2582 2583 switch (scan_inst->opcode) { 2584 case BRW_OPCODE_MOV: 2585 scan_inst->src[i] = inst->src[0]; 2586 progress = true; 2587 break; 2588 2589 case BRW_OPCODE_MUL: 2590 case BRW_OPCODE_ADD: 2591 if (i == 1) { 2592 scan_inst->src[i] = inst->src[0]; 2593 progress = true; 2594 } else if (i == 0 && scan_inst->src[1].file != IMM) { 2595 /* Fit this constant in by commuting the operands */ 2596 scan_inst->src[0] = scan_inst->src[1]; 2597 scan_inst->src[1] = inst->src[0]; 2598 } 2599 break; 2600 } 2601 } 2602 2603 if (scan_inst->dst.file == GRF && 2604 scan_inst->dst.reg == inst->dst.reg && 2605 (scan_inst->dst.reg_offset == inst->dst.reg_offset || 2606 scan_inst->opcode == FS_OPCODE_TEX)) { 2607 break; 2608 } 2609 } 2610 } 2611 2612 return progress; 2613} 2614/** 2615 * Must be called after calculate_live_intervales() to remove unused 2616 * writes to registers -- register allocation will fail otherwise 2617 * because something deffed but not used won't be considered to 2618 * interfere with other regs. 2619 */ 2620bool 2621fs_visitor::dead_code_eliminate() 2622{ 2623 bool progress = false; 2624 int num_vars = this->virtual_grf_next; 2625 bool dead[num_vars]; 2626 2627 for (int i = 0; i < num_vars; i++) { 2628 /* This would be ">=", but FS_OPCODE_DISCARD has a src == dst where 2629 * it writes dst then reads it as src. 2630 */ 2631 dead[i] = this->virtual_grf_def[i] > this->virtual_grf_use[i]; 2632 2633 if (dead[i]) { 2634 /* Mark off its interval so it won't interfere with anything. */ 2635 this->virtual_grf_def[i] = -1; 2636 this->virtual_grf_use[i] = -1; 2637 } 2638 } 2639 2640 foreach_iter(exec_list_iterator, iter, this->instructions) { 2641 fs_inst *inst = (fs_inst *)iter.get(); 2642 2643 if (inst->dst.file == GRF && dead[inst->dst.reg]) { 2644 inst->remove(); 2645 progress = true; 2646 } 2647 } 2648 2649 return progress; 2650} 2651 2652bool 2653fs_visitor::virtual_grf_interferes(int a, int b) 2654{ 2655 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2656 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2657 2658 /* For dead code, just check if the def interferes with the other range. */ 2659 if (this->virtual_grf_use[a] == -1) { 2660 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && 2661 this->virtual_grf_def[a] < this->virtual_grf_use[b]); 2662 } 2663 if (this->virtual_grf_use[b] == -1) { 2664 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && 2665 this->virtual_grf_def[b] < this->virtual_grf_use[a]); 2666 } 2667 2668 return start <= end; 2669} 2670 2671static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2672{ 2673 struct brw_reg brw_reg; 2674 2675 switch (reg->file) { 2676 case GRF: 2677 case ARF: 2678 case MRF: 2679 brw_reg = brw_vec8_reg(reg->file, 2680 reg->hw_reg, 0); 2681 brw_reg = retype(brw_reg, reg->type); 2682 break; 2683 case IMM: 2684 switch (reg->type) { 2685 case BRW_REGISTER_TYPE_F: 2686 brw_reg = brw_imm_f(reg->imm.f); 2687 break; 2688 case BRW_REGISTER_TYPE_D: 2689 brw_reg = brw_imm_d(reg->imm.i); 2690 break; 2691 case BRW_REGISTER_TYPE_UD: 2692 brw_reg = brw_imm_ud(reg->imm.u); 2693 break; 2694 default: 2695 assert(!"not reached"); 2696 break; 2697 } 2698 break; 2699 case FIXED_HW_REG: 2700 brw_reg = reg->fixed_hw_reg; 2701 break; 2702 case BAD_FILE: 2703 /* Probably unused. */ 2704 brw_reg = brw_null_reg(); 2705 break; 2706 case UNIFORM: 2707 assert(!"not reached"); 2708 brw_reg = brw_null_reg(); 2709 break; 2710 } 2711 if (reg->abs) 2712 brw_reg = brw_abs(brw_reg); 2713 if (reg->negate) 2714 brw_reg = negate(brw_reg); 2715 2716 return brw_reg; 2717} 2718 2719void 2720fs_visitor::generate_code() 2721{ 2722 unsigned int annotation_len = 0; 2723 int last_native_inst = 0; 2724 struct brw_instruction *if_stack[16], *loop_stack[16]; 2725 int if_stack_depth = 0, loop_stack_depth = 0; 2726 int if_depth_in_loop[16]; 2727 2728 if_depth_in_loop[loop_stack_depth] = 0; 2729 2730 memset(&if_stack, 0, sizeof(if_stack)); 2731 foreach_iter(exec_list_iterator, iter, this->instructions) { 2732 fs_inst *inst = (fs_inst *)iter.get(); 2733 struct brw_reg src[3], dst; 2734 2735 for (unsigned int i = 0; i < 3; i++) { 2736 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2737 } 2738 dst = brw_reg_from_fs_reg(&inst->dst); 2739 2740 brw_set_conditionalmod(p, inst->conditional_mod); 2741 brw_set_predicate_control(p, inst->predicated); 2742 2743 switch (inst->opcode) { 2744 case BRW_OPCODE_MOV: 2745 brw_MOV(p, dst, src[0]); 2746 break; 2747 case BRW_OPCODE_ADD: 2748 brw_ADD(p, dst, src[0], src[1]); 2749 break; 2750 case BRW_OPCODE_MUL: 2751 brw_MUL(p, dst, src[0], src[1]); 2752 break; 2753 2754 case BRW_OPCODE_FRC: 2755 brw_FRC(p, dst, src[0]); 2756 break; 2757 case BRW_OPCODE_RNDD: 2758 brw_RNDD(p, dst, src[0]); 2759 break; 2760 case BRW_OPCODE_RNDZ: 2761 brw_RNDZ(p, dst, src[0]); 2762 break; 2763 2764 case BRW_OPCODE_AND: 2765 brw_AND(p, dst, src[0], src[1]); 2766 break; 2767 case BRW_OPCODE_OR: 2768 brw_OR(p, dst, src[0], src[1]); 2769 break; 2770 case BRW_OPCODE_XOR: 2771 brw_XOR(p, dst, src[0], src[1]); 2772 break; 2773 2774 case BRW_OPCODE_CMP: 2775 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2776 break; 2777 case BRW_OPCODE_SEL: 2778 brw_SEL(p, dst, src[0], src[1]); 2779 break; 2780 2781 case BRW_OPCODE_IF: 2782 assert(if_stack_depth < 16); 2783 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2784 if_depth_in_loop[loop_stack_depth]++; 2785 if_stack_depth++; 2786 break; 2787 case BRW_OPCODE_ELSE: 2788 if_stack[if_stack_depth - 1] = 2789 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2790 break; 2791 case BRW_OPCODE_ENDIF: 2792 if_stack_depth--; 2793 brw_ENDIF(p , if_stack[if_stack_depth]); 2794 if_depth_in_loop[loop_stack_depth]--; 2795 break; 2796 2797 case BRW_OPCODE_DO: 2798 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2799 if_depth_in_loop[loop_stack_depth] = 0; 2800 break; 2801 2802 case BRW_OPCODE_BREAK: 2803 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2804 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2805 break; 2806 case BRW_OPCODE_CONTINUE: 2807 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2808 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2809 break; 2810 2811 case BRW_OPCODE_WHILE: { 2812 struct brw_instruction *inst0, *inst1; 2813 GLuint br = 1; 2814 2815 if (intel->gen >= 5) 2816 br = 2; 2817 2818 assert(loop_stack_depth > 0); 2819 loop_stack_depth--; 2820 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2821 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2822 while (inst0 > loop_stack[loop_stack_depth]) { 2823 inst0--; 2824 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2825 inst0->bits3.if_else.jump_count == 0) { 2826 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2827 } 2828 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2829 inst0->bits3.if_else.jump_count == 0) { 2830 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2831 } 2832 } 2833 } 2834 break; 2835 2836 case FS_OPCODE_RCP: 2837 case FS_OPCODE_RSQ: 2838 case FS_OPCODE_SQRT: 2839 case FS_OPCODE_EXP2: 2840 case FS_OPCODE_LOG2: 2841 case FS_OPCODE_POW: 2842 case FS_OPCODE_SIN: 2843 case FS_OPCODE_COS: 2844 generate_math(inst, dst, src); 2845 break; 2846 case FS_OPCODE_LINTERP: 2847 generate_linterp(inst, dst, src); 2848 break; 2849 case FS_OPCODE_TEX: 2850 case FS_OPCODE_TXB: 2851 case FS_OPCODE_TXL: 2852 generate_tex(inst, dst, src[0]); 2853 break; 2854 case FS_OPCODE_DISCARD: 2855 generate_discard(inst, dst /* src0 == dst */); 2856 break; 2857 case FS_OPCODE_DDX: 2858 generate_ddx(inst, dst, src[0]); 2859 break; 2860 case FS_OPCODE_DDY: 2861 generate_ddy(inst, dst, src[0]); 2862 break; 2863 case FS_OPCODE_FB_WRITE: 2864 generate_fb_write(inst); 2865 break; 2866 default: 2867 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2868 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2869 brw_opcodes[inst->opcode].name); 2870 } else { 2871 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2872 } 2873 this->fail = true; 2874 } 2875 2876 if (annotation_len < p->nr_insn) { 2877 annotation_len *= 2; 2878 if (annotation_len < 16) 2879 annotation_len = 16; 2880 2881 this->annotation_string = talloc_realloc(this->mem_ctx, 2882 annotation_string, 2883 const char *, 2884 annotation_len); 2885 this->annotation_ir = talloc_realloc(this->mem_ctx, 2886 annotation_ir, 2887 ir_instruction *, 2888 annotation_len); 2889 } 2890 2891 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2892 this->annotation_string[i] = inst->annotation; 2893 this->annotation_ir[i] = inst->ir; 2894 } 2895 last_native_inst = p->nr_insn; 2896 } 2897} 2898 2899GLboolean 2900brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2901{ 2902 struct brw_compile *p = &c->func; 2903 struct intel_context *intel = &brw->intel; 2904 GLcontext *ctx = &intel->ctx; 2905 struct brw_shader *shader = NULL; 2906 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2907 2908 if (!prog) 2909 return GL_FALSE; 2910 2911 if (!using_new_fs) 2912 return GL_FALSE; 2913 2914 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2915 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2916 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2917 break; 2918 } 2919 } 2920 if (!shader) 2921 return GL_FALSE; 2922 2923 /* We always use 8-wide mode, at least for now. For one, flow 2924 * control only works in 8-wide. Also, when we're fragment shader 2925 * bound, we're almost always under register pressure as well, so 2926 * 8-wide would save us from the performance cliff of spilling 2927 * regs. 2928 */ 2929 c->dispatch_width = 8; 2930 2931 if (INTEL_DEBUG & DEBUG_WM) { 2932 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2933 _mesa_print_ir(shader->ir, NULL); 2934 printf("\n"); 2935 } 2936 2937 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2938 */ 2939 fs_visitor v(c, shader); 2940 2941 if (0) { 2942 v.emit_dummy_fs(); 2943 } else { 2944 v.calculate_urb_setup(); 2945 if (intel->gen < 6) 2946 v.emit_interpolation_setup_gen4(); 2947 else 2948 v.emit_interpolation_setup_gen6(); 2949 2950 /* Generate FS IR for main(). (the visitor only descends into 2951 * functions called "main"). 2952 */ 2953 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2954 ir_instruction *ir = (ir_instruction *)iter.get(); 2955 v.base_ir = ir; 2956 ir->accept(&v); 2957 } 2958 2959 v.emit_fb_writes(); 2960 v.assign_curb_setup(); 2961 v.assign_urb_setup(); 2962 2963 bool progress; 2964 do { 2965 progress = false; 2966 2967 v.calculate_live_intervals(); 2968 progress = v.propagate_constants() || progress; 2969 progress = v.dead_code_eliminate() || progress; 2970 } while (progress); 2971 2972 if (0) 2973 v.assign_regs_trivial(); 2974 else 2975 v.assign_regs(); 2976 } 2977 2978 if (!v.fail) 2979 v.generate_code(); 2980 2981 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2982 2983 if (v.fail) 2984 return GL_FALSE; 2985 2986 if (INTEL_DEBUG & DEBUG_WM) { 2987 const char *last_annotation_string = NULL; 2988 ir_instruction *last_annotation_ir = NULL; 2989 2990 printf("Native code for fragment shader %d:\n", prog->Name); 2991 for (unsigned int i = 0; i < p->nr_insn; i++) { 2992 if (last_annotation_ir != v.annotation_ir[i]) { 2993 last_annotation_ir = v.annotation_ir[i]; 2994 if (last_annotation_ir) { 2995 printf(" "); 2996 last_annotation_ir->print(); 2997 printf("\n"); 2998 } 2999 } 3000 if (last_annotation_string != v.annotation_string[i]) { 3001 last_annotation_string = v.annotation_string[i]; 3002 if (last_annotation_string) 3003 printf(" %s\n", last_annotation_string); 3004 } 3005 brw_disasm(stdout, &p->store[i], intel->gen); 3006 } 3007 printf("\n"); 3008 } 3009 3010 c->prog_data.total_grf = v.grf_used; 3011 c->prog_data.total_scratch = 0; 3012 3013 return GL_TRUE; 3014} 3015