brw_fs.cpp revision b90c7d1713c5a52fd85cb9dacad5828ae2fdbf6c
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir_optimization.h" 48#include "../glsl/ir_print_visitor.h" 49 50enum register_file { 51 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 52 GRF = BRW_GENERAL_REGISTER_FILE, 53 MRF = BRW_MESSAGE_REGISTER_FILE, 54 IMM = BRW_IMMEDIATE_VALUE, 55 FIXED_HW_REG, /* a struct brw_reg */ 56 UNIFORM, /* prog_data->params[hw_reg] */ 57 BAD_FILE 58}; 59 60enum fs_opcodes { 61 FS_OPCODE_FB_WRITE = 256, 62 FS_OPCODE_RCP, 63 FS_OPCODE_RSQ, 64 FS_OPCODE_SQRT, 65 FS_OPCODE_EXP2, 66 FS_OPCODE_LOG2, 67 FS_OPCODE_POW, 68 FS_OPCODE_SIN, 69 FS_OPCODE_COS, 70 FS_OPCODE_DDX, 71 FS_OPCODE_DDY, 72 FS_OPCODE_LINTERP, 73 FS_OPCODE_TEX, 74 FS_OPCODE_TXB, 75 FS_OPCODE_TXL, 76 FS_OPCODE_DISCARD, 77}; 78 79static int using_new_fs = -1; 80static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 81 82struct gl_shader * 83brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 84{ 85 struct brw_shader *shader; 86 87 shader = talloc_zero(NULL, struct brw_shader); 88 if (shader) { 89 shader->base.Type = type; 90 shader->base.Name = name; 91 _mesa_init_shader(ctx, &shader->base); 92 } 93 94 return &shader->base; 95} 96 97struct gl_shader_program * 98brw_new_shader_program(GLcontext *ctx, GLuint name) 99{ 100 struct brw_shader_program *prog; 101 prog = talloc_zero(NULL, struct brw_shader_program); 102 if (prog) { 103 prog->base.Name = name; 104 _mesa_init_shader_program(ctx, &prog->base); 105 } 106 return &prog->base; 107} 108 109GLboolean 110brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 111{ 112 if (!_mesa_ir_compile_shader(ctx, shader)) 113 return GL_FALSE; 114 115 return GL_TRUE; 116} 117 118GLboolean 119brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 120{ 121 if (using_new_fs == -1) 122 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 123 124 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 125 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 126 127 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 128 void *mem_ctx = talloc_new(NULL); 129 bool progress; 130 131 if (shader->ir) 132 talloc_free(shader->ir); 133 shader->ir = new(shader) exec_list; 134 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 135 136 do_mat_op_to_vec(shader->ir); 137 do_mod_to_fract(shader->ir); 138 do_div_to_mul_rcp(shader->ir); 139 do_sub_to_add_neg(shader->ir); 140 do_explog_to_explog2(shader->ir); 141 142 do { 143 progress = false; 144 145 brw_do_channel_expressions(shader->ir); 146 brw_do_vector_splitting(shader->ir); 147 148 progress = do_lower_jumps(shader->ir, true, true, 149 true, /* main return */ 150 false, /* continue */ 151 false /* loops */ 152 ) || progress; 153 154 progress = do_common_optimization(shader->ir, true, 32) || progress; 155 156 progress = lower_noise(shader->ir) || progress; 157 progress = 158 lower_variable_index_to_cond_assign(shader->ir, 159 GL_TRUE, /* input */ 160 GL_TRUE, /* output */ 161 GL_TRUE, /* temp */ 162 GL_TRUE /* uniform */ 163 ) || progress; 164 } while (progress); 165 166 validate_ir_tree(shader->ir); 167 168 reparent_ir(shader->ir, shader->ir); 169 talloc_free(mem_ctx); 170 } 171 } 172 173 if (!_mesa_ir_link_shader(ctx, prog)) 174 return GL_FALSE; 175 176 return GL_TRUE; 177} 178 179static int 180type_size(const struct glsl_type *type) 181{ 182 unsigned int size, i; 183 184 switch (type->base_type) { 185 case GLSL_TYPE_UINT: 186 case GLSL_TYPE_INT: 187 case GLSL_TYPE_FLOAT: 188 case GLSL_TYPE_BOOL: 189 return type->components(); 190 case GLSL_TYPE_ARRAY: 191 return type_size(type->fields.array) * type->length; 192 case GLSL_TYPE_STRUCT: 193 size = 0; 194 for (i = 0; i < type->length; i++) { 195 size += type_size(type->fields.structure[i].type); 196 } 197 return size; 198 case GLSL_TYPE_SAMPLER: 199 /* Samplers take up no register space, since they're baked in at 200 * link time. 201 */ 202 return 0; 203 default: 204 assert(!"not reached"); 205 return 0; 206 } 207} 208 209class fs_reg { 210public: 211 /* Callers of this talloc-based new need not call delete. It's 212 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 213 static void* operator new(size_t size, void *ctx) 214 { 215 void *node; 216 217 node = talloc_size(ctx, size); 218 assert(node != NULL); 219 220 return node; 221 } 222 223 void init() 224 { 225 this->reg = 0; 226 this->reg_offset = 0; 227 this->negate = 0; 228 this->abs = 0; 229 this->hw_reg = -1; 230 } 231 232 /** Generic unset register constructor. */ 233 fs_reg() 234 { 235 init(); 236 this->file = BAD_FILE; 237 } 238 239 /** Immediate value constructor. */ 240 fs_reg(float f) 241 { 242 init(); 243 this->file = IMM; 244 this->type = BRW_REGISTER_TYPE_F; 245 this->imm.f = f; 246 } 247 248 /** Immediate value constructor. */ 249 fs_reg(int32_t i) 250 { 251 init(); 252 this->file = IMM; 253 this->type = BRW_REGISTER_TYPE_D; 254 this->imm.i = i; 255 } 256 257 /** Immediate value constructor. */ 258 fs_reg(uint32_t u) 259 { 260 init(); 261 this->file = IMM; 262 this->type = BRW_REGISTER_TYPE_UD; 263 this->imm.u = u; 264 } 265 266 /** Fixed brw_reg Immediate value constructor. */ 267 fs_reg(struct brw_reg fixed_hw_reg) 268 { 269 init(); 270 this->file = FIXED_HW_REG; 271 this->fixed_hw_reg = fixed_hw_reg; 272 this->type = fixed_hw_reg.type; 273 } 274 275 fs_reg(enum register_file file, int hw_reg); 276 fs_reg(class fs_visitor *v, const struct glsl_type *type); 277 278 /** Register file: ARF, GRF, MRF, IMM. */ 279 enum register_file file; 280 /** virtual register number. 0 = fixed hw reg */ 281 int reg; 282 /** Offset within the virtual register. */ 283 int reg_offset; 284 /** HW register number. Generally unset until register allocation. */ 285 int hw_reg; 286 /** Register type. BRW_REGISTER_TYPE_* */ 287 int type; 288 bool negate; 289 bool abs; 290 struct brw_reg fixed_hw_reg; 291 292 /** Value for file == BRW_IMMMEDIATE_FILE */ 293 union { 294 int32_t i; 295 uint32_t u; 296 float f; 297 } imm; 298}; 299 300static const fs_reg reg_undef; 301static const fs_reg reg_null(ARF, BRW_ARF_NULL); 302 303class fs_inst : public exec_node { 304public: 305 /* Callers of this talloc-based new need not call delete. It's 306 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 307 static void* operator new(size_t size, void *ctx) 308 { 309 void *node; 310 311 node = talloc_zero_size(ctx, size); 312 assert(node != NULL); 313 314 return node; 315 } 316 317 void init() 318 { 319 this->opcode = BRW_OPCODE_NOP; 320 this->saturate = false; 321 this->conditional_mod = BRW_CONDITIONAL_NONE; 322 this->predicated = false; 323 this->sampler = 0; 324 this->target = 0; 325 this->eot = false; 326 this->shadow_compare = false; 327 } 328 329 fs_inst() 330 { 331 init(); 332 } 333 334 fs_inst(int opcode) 335 { 336 init(); 337 this->opcode = opcode; 338 } 339 340 fs_inst(int opcode, fs_reg dst, fs_reg src0) 341 { 342 init(); 343 this->opcode = opcode; 344 this->dst = dst; 345 this->src[0] = src0; 346 } 347 348 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 349 { 350 init(); 351 this->opcode = opcode; 352 this->dst = dst; 353 this->src[0] = src0; 354 this->src[1] = src1; 355 } 356 357 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 358 { 359 init(); 360 this->opcode = opcode; 361 this->dst = dst; 362 this->src[0] = src0; 363 this->src[1] = src1; 364 this->src[2] = src2; 365 } 366 367 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 368 fs_reg dst; 369 fs_reg src[3]; 370 bool saturate; 371 bool predicated; 372 int conditional_mod; /**< BRW_CONDITIONAL_* */ 373 374 int mlen; /**< SEND message length */ 375 int sampler; 376 int target; /**< MRT target. */ 377 bool eot; 378 bool shadow_compare; 379 380 /** @{ 381 * Annotation for the generated IR. One of the two can be set. 382 */ 383 ir_instruction *ir; 384 const char *annotation; 385 /** @} */ 386}; 387 388class fs_visitor : public ir_visitor 389{ 390public: 391 392 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 393 { 394 this->c = c; 395 this->p = &c->func; 396 this->brw = p->brw; 397 this->fp = brw->fragment_program; 398 this->intel = &brw->intel; 399 this->ctx = &intel->ctx; 400 this->mem_ctx = talloc_new(NULL); 401 this->shader = shader; 402 this->fail = false; 403 this->variable_ht = hash_table_ctor(0, 404 hash_table_pointer_hash, 405 hash_table_pointer_compare); 406 407 this->frag_color = NULL; 408 this->frag_data = NULL; 409 this->frag_depth = NULL; 410 this->first_non_payload_grf = 0; 411 412 this->current_annotation = NULL; 413 this->annotation_string = NULL; 414 this->annotation_ir = NULL; 415 this->base_ir = NULL; 416 417 this->virtual_grf_sizes = NULL; 418 this->virtual_grf_next = 1; 419 this->virtual_grf_array_size = 0; 420 this->virtual_grf_def = NULL; 421 this->virtual_grf_use = NULL; 422 } 423 ~fs_visitor() 424 { 425 talloc_free(this->mem_ctx); 426 hash_table_dtor(this->variable_ht); 427 } 428 429 fs_reg *variable_storage(ir_variable *var); 430 int virtual_grf_alloc(int size); 431 432 void visit(ir_variable *ir); 433 void visit(ir_assignment *ir); 434 void visit(ir_dereference_variable *ir); 435 void visit(ir_dereference_record *ir); 436 void visit(ir_dereference_array *ir); 437 void visit(ir_expression *ir); 438 void visit(ir_texture *ir); 439 void visit(ir_if *ir); 440 void visit(ir_constant *ir); 441 void visit(ir_swizzle *ir); 442 void visit(ir_return *ir); 443 void visit(ir_loop *ir); 444 void visit(ir_loop_jump *ir); 445 void visit(ir_discard *ir); 446 void visit(ir_call *ir); 447 void visit(ir_function *ir); 448 void visit(ir_function_signature *ir); 449 450 fs_inst *emit(fs_inst inst); 451 void assign_curb_setup(); 452 void assign_urb_setup(); 453 void assign_regs(); 454 void assign_regs_trivial(); 455 void calculate_live_intervals(); 456 bool virtual_grf_interferes(int a, int b); 457 void generate_code(); 458 void generate_fb_write(fs_inst *inst); 459 void generate_linterp(fs_inst *inst, struct brw_reg dst, 460 struct brw_reg *src); 461 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 462 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 463 void generate_discard(fs_inst *inst, struct brw_reg temp); 464 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 465 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 466 467 void emit_dummy_fs(); 468 void emit_fragcoord_interpolation(ir_variable *ir); 469 void emit_general_interpolation(ir_variable *ir); 470 void emit_interpolation_setup(); 471 void emit_fb_writes(); 472 473 struct brw_reg interp_reg(int location, int channel); 474 int setup_uniform_values(int loc, const glsl_type *type); 475 void setup_builtin_uniform_values(ir_variable *ir); 476 477 struct brw_context *brw; 478 const struct gl_fragment_program *fp; 479 struct intel_context *intel; 480 GLcontext *ctx; 481 struct brw_wm_compile *c; 482 struct brw_compile *p; 483 struct brw_shader *shader; 484 void *mem_ctx; 485 exec_list instructions; 486 487 int *virtual_grf_sizes; 488 int virtual_grf_next; 489 int virtual_grf_array_size; 490 int *virtual_grf_def; 491 int *virtual_grf_use; 492 493 struct hash_table *variable_ht; 494 ir_variable *frag_color, *frag_data, *frag_depth; 495 int first_non_payload_grf; 496 497 /** @{ debug annotation info */ 498 const char *current_annotation; 499 ir_instruction *base_ir; 500 const char **annotation_string; 501 ir_instruction **annotation_ir; 502 /** @} */ 503 504 bool fail; 505 506 /* Result of last visit() method. */ 507 fs_reg result; 508 509 fs_reg pixel_x; 510 fs_reg pixel_y; 511 fs_reg wpos_w; 512 fs_reg pixel_w; 513 fs_reg delta_x; 514 fs_reg delta_y; 515 516 int grf_used; 517 518}; 519 520int 521fs_visitor::virtual_grf_alloc(int size) 522{ 523 if (virtual_grf_array_size <= virtual_grf_next) { 524 if (virtual_grf_array_size == 0) 525 virtual_grf_array_size = 16; 526 else 527 virtual_grf_array_size *= 2; 528 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 529 int, virtual_grf_array_size); 530 531 /* This slot is always unused. */ 532 virtual_grf_sizes[0] = 0; 533 } 534 virtual_grf_sizes[virtual_grf_next] = size; 535 return virtual_grf_next++; 536} 537 538/** Fixed HW reg constructor. */ 539fs_reg::fs_reg(enum register_file file, int hw_reg) 540{ 541 init(); 542 this->file = file; 543 this->hw_reg = hw_reg; 544 this->type = BRW_REGISTER_TYPE_F; 545} 546 547int 548brw_type_for_base_type(const struct glsl_type *type) 549{ 550 switch (type->base_type) { 551 case GLSL_TYPE_FLOAT: 552 return BRW_REGISTER_TYPE_F; 553 case GLSL_TYPE_INT: 554 case GLSL_TYPE_BOOL: 555 return BRW_REGISTER_TYPE_D; 556 case GLSL_TYPE_UINT: 557 return BRW_REGISTER_TYPE_UD; 558 case GLSL_TYPE_ARRAY: 559 case GLSL_TYPE_STRUCT: 560 /* These should be overridden with the type of the member when 561 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 562 * way to trip up if we don't. 563 */ 564 return BRW_REGISTER_TYPE_UD; 565 default: 566 assert(!"not reached"); 567 return BRW_REGISTER_TYPE_F; 568 } 569} 570 571/** Automatic reg constructor. */ 572fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 573{ 574 init(); 575 576 this->file = GRF; 577 this->reg = v->virtual_grf_alloc(type_size(type)); 578 this->reg_offset = 0; 579 this->type = brw_type_for_base_type(type); 580} 581 582fs_reg * 583fs_visitor::variable_storage(ir_variable *var) 584{ 585 return (fs_reg *)hash_table_find(this->variable_ht, var); 586} 587 588/* Our support for uniforms is piggy-backed on the struct 589 * gl_fragment_program, because that's where the values actually 590 * get stored, rather than in some global gl_shader_program uniform 591 * store. 592 */ 593int 594fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 595{ 596 unsigned int offset = 0; 597 float *vec_values; 598 599 if (type->is_matrix()) { 600 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 601 type->vector_elements, 602 1); 603 604 for (unsigned int i = 0; i < type->matrix_columns; i++) { 605 offset += setup_uniform_values(loc + offset, column); 606 } 607 608 return offset; 609 } 610 611 switch (type->base_type) { 612 case GLSL_TYPE_FLOAT: 613 case GLSL_TYPE_UINT: 614 case GLSL_TYPE_INT: 615 case GLSL_TYPE_BOOL: 616 vec_values = fp->Base.Parameters->ParameterValues[loc]; 617 for (unsigned int i = 0; i < type->vector_elements; i++) { 618 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 619 } 620 return 1; 621 622 case GLSL_TYPE_STRUCT: 623 for (unsigned int i = 0; i < type->length; i++) { 624 offset += setup_uniform_values(loc + offset, 625 type->fields.structure[i].type); 626 } 627 return offset; 628 629 case GLSL_TYPE_ARRAY: 630 for (unsigned int i = 0; i < type->length; i++) { 631 offset += setup_uniform_values(loc + offset, type->fields.array); 632 } 633 return offset; 634 635 case GLSL_TYPE_SAMPLER: 636 /* The sampler takes up a slot, but we don't use any values from it. */ 637 return 1; 638 639 default: 640 assert(!"not reached"); 641 return 0; 642 } 643} 644 645 646/* Our support for builtin uniforms is even scarier than non-builtin. 647 * It sits on top of the PROG_STATE_VAR parameters that are 648 * automatically updated from GL context state. 649 */ 650void 651fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 652{ 653 const struct gl_builtin_uniform_desc *statevar = NULL; 654 655 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 656 statevar = &_mesa_builtin_uniform_desc[i]; 657 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 658 break; 659 } 660 661 if (!statevar->name) { 662 this->fail = true; 663 printf("Failed to find builtin uniform `%s'\n", ir->name); 664 return; 665 } 666 667 int array_count; 668 if (ir->type->is_array()) { 669 array_count = ir->type->length; 670 } else { 671 array_count = 1; 672 } 673 674 for (int a = 0; a < array_count; a++) { 675 for (unsigned int i = 0; i < statevar->num_elements; i++) { 676 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 677 int tokens[STATE_LENGTH]; 678 679 memcpy(tokens, element->tokens, sizeof(element->tokens)); 680 if (ir->type->is_array()) { 681 tokens[1] = a; 682 } 683 684 /* This state reference has already been setup by ir_to_mesa, 685 * but we'll get the same index back here. 686 */ 687 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 688 (gl_state_index *)tokens); 689 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 690 691 /* Add each of the unique swizzles of the element as a 692 * parameter. This'll end up matching the expected layout of 693 * the array/matrix/structure we're trying to fill in. 694 */ 695 int last_swiz = -1; 696 for (unsigned int i = 0; i < 4; i++) { 697 int this_swiz = GET_SWZ(element->swizzle, i); 698 if (this_swiz == last_swiz) 699 break; 700 last_swiz = this_swiz; 701 702 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 703 } 704 } 705 } 706} 707 708void 709fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 710{ 711 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 712 fs_reg wpos = *reg; 713 fs_reg neg_y = this->pixel_y; 714 neg_y.negate = true; 715 716 /* gl_FragCoord.x */ 717 if (ir->pixel_center_integer) { 718 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 719 } else { 720 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 721 } 722 wpos.reg_offset++; 723 724 /* gl_FragCoord.y */ 725 if (ir->origin_upper_left && ir->pixel_center_integer) { 726 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 727 } else { 728 fs_reg pixel_y = this->pixel_y; 729 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 730 731 if (!ir->origin_upper_left) { 732 pixel_y.negate = true; 733 offset += c->key.drawable_height - 1.0; 734 } 735 736 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 737 } 738 wpos.reg_offset++; 739 740 /* gl_FragCoord.z */ 741 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 742 interp_reg(FRAG_ATTRIB_WPOS, 2))); 743 wpos.reg_offset++; 744 745 /* gl_FragCoord.w: Already set up in emit_interpolation */ 746 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 747 748 hash_table_insert(this->variable_ht, reg, ir); 749} 750 751 752void 753fs_visitor::emit_general_interpolation(ir_variable *ir) 754{ 755 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 756 /* Interpolation is always in floating point regs. */ 757 reg->type = BRW_REGISTER_TYPE_F; 758 fs_reg attr = *reg; 759 760 unsigned int array_elements; 761 const glsl_type *type; 762 763 if (ir->type->is_array()) { 764 array_elements = ir->type->length; 765 if (array_elements == 0) { 766 this->fail = true; 767 } 768 type = ir->type->fields.array; 769 } else { 770 array_elements = 1; 771 type = ir->type; 772 } 773 774 int location = ir->location; 775 for (unsigned int i = 0; i < array_elements; i++) { 776 for (unsigned int j = 0; j < type->matrix_columns; j++) { 777 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) { 778 /* If there's no incoming setup data for this slot, don't 779 * emit interpolation for it (since it's not used, and 780 * we'd fall over later trying to find the setup data. 781 */ 782 attr.reg_offset += type->vector_elements; 783 continue; 784 } 785 786 for (unsigned int c = 0; c < type->vector_elements; c++) { 787 struct brw_reg interp = interp_reg(location, c); 788 emit(fs_inst(FS_OPCODE_LINTERP, 789 attr, 790 this->delta_x, 791 this->delta_y, 792 fs_reg(interp))); 793 attr.reg_offset++; 794 } 795 attr.reg_offset -= type->vector_elements; 796 797 for (unsigned int c = 0; c < type->vector_elements; c++) { 798 emit(fs_inst(BRW_OPCODE_MUL, 799 attr, 800 attr, 801 this->pixel_w)); 802 attr.reg_offset++; 803 } 804 location++; 805 } 806 } 807 808 hash_table_insert(this->variable_ht, reg, ir); 809} 810 811void 812fs_visitor::visit(ir_variable *ir) 813{ 814 fs_reg *reg = NULL; 815 816 if (variable_storage(ir)) 817 return; 818 819 if (strcmp(ir->name, "gl_FragColor") == 0) { 820 this->frag_color = ir; 821 } else if (strcmp(ir->name, "gl_FragData") == 0) { 822 this->frag_data = ir; 823 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 824 this->frag_depth = ir; 825 } 826 827 if (ir->mode == ir_var_in) { 828 if (!strcmp(ir->name, "gl_FragCoord")) { 829 emit_fragcoord_interpolation(ir); 830 return; 831 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 832 reg = new(this->mem_ctx) fs_reg(this, ir->type); 833 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 834 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 835 * us front face 836 */ 837 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 838 *reg, 839 fs_reg(r1_6ud), 840 fs_reg(1u << 31))); 841 inst->conditional_mod = BRW_CONDITIONAL_L; 842 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 843 } else { 844 emit_general_interpolation(ir); 845 return; 846 } 847 } 848 849 if (ir->mode == ir_var_uniform) { 850 int param_index = c->prog_data.nr_params; 851 852 if (!strncmp(ir->name, "gl_", 3)) { 853 setup_builtin_uniform_values(ir); 854 } else { 855 setup_uniform_values(ir->location, ir->type); 856 } 857 858 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 859 } 860 861 if (!reg) 862 reg = new(this->mem_ctx) fs_reg(this, ir->type); 863 864 hash_table_insert(this->variable_ht, reg, ir); 865} 866 867void 868fs_visitor::visit(ir_dereference_variable *ir) 869{ 870 fs_reg *reg = variable_storage(ir->var); 871 this->result = *reg; 872} 873 874void 875fs_visitor::visit(ir_dereference_record *ir) 876{ 877 const glsl_type *struct_type = ir->record->type; 878 879 ir->record->accept(this); 880 881 unsigned int offset = 0; 882 for (unsigned int i = 0; i < struct_type->length; i++) { 883 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 884 break; 885 offset += type_size(struct_type->fields.structure[i].type); 886 } 887 this->result.reg_offset += offset; 888 this->result.type = brw_type_for_base_type(ir->type); 889} 890 891void 892fs_visitor::visit(ir_dereference_array *ir) 893{ 894 ir_constant *index; 895 int element_size; 896 897 ir->array->accept(this); 898 index = ir->array_index->as_constant(); 899 900 element_size = type_size(ir->type); 901 this->result.type = brw_type_for_base_type(ir->type); 902 903 if (index) { 904 assert(this->result.file == UNIFORM || 905 (this->result.file == GRF && 906 this->result.reg != 0)); 907 this->result.reg_offset += index->value.i[0] * element_size; 908 } else { 909 assert(!"FINISHME: non-constant array element"); 910 } 911} 912 913void 914fs_visitor::visit(ir_expression *ir) 915{ 916 unsigned int operand; 917 fs_reg op[2], temp; 918 fs_reg result; 919 fs_inst *inst; 920 921 for (operand = 0; operand < ir->get_num_operands(); operand++) { 922 ir->operands[operand]->accept(this); 923 if (this->result.file == BAD_FILE) { 924 ir_print_visitor v; 925 printf("Failed to get tree for expression operand:\n"); 926 ir->operands[operand]->accept(&v); 927 this->fail = true; 928 } 929 op[operand] = this->result; 930 931 /* Matrix expression operands should have been broken down to vector 932 * operations already. 933 */ 934 assert(!ir->operands[operand]->type->is_matrix()); 935 /* And then those vector operands should have been broken down to scalar. 936 */ 937 assert(!ir->operands[operand]->type->is_vector()); 938 } 939 940 /* Storage for our result. If our result goes into an assignment, it will 941 * just get copy-propagated out, so no worries. 942 */ 943 this->result = fs_reg(this, ir->type); 944 945 switch (ir->operation) { 946 case ir_unop_logic_not: 947 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 948 break; 949 case ir_unop_neg: 950 op[0].negate = !op[0].negate; 951 this->result = op[0]; 952 break; 953 case ir_unop_abs: 954 op[0].abs = true; 955 this->result = op[0]; 956 break; 957 case ir_unop_sign: 958 temp = fs_reg(this, ir->type); 959 960 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 961 962 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 963 inst->conditional_mod = BRW_CONDITIONAL_G; 964 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 965 inst->predicated = true; 966 967 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 968 inst->conditional_mod = BRW_CONDITIONAL_L; 969 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 970 inst->predicated = true; 971 972 break; 973 case ir_unop_rcp: 974 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 975 break; 976 977 case ir_unop_exp2: 978 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 979 break; 980 case ir_unop_log2: 981 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 982 break; 983 case ir_unop_exp: 984 case ir_unop_log: 985 assert(!"not reached: should be handled by ir_explog_to_explog2"); 986 break; 987 case ir_unop_sin: 988 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 989 break; 990 case ir_unop_cos: 991 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 992 break; 993 994 case ir_unop_dFdx: 995 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 996 break; 997 case ir_unop_dFdy: 998 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 999 break; 1000 1001 case ir_binop_add: 1002 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 1003 break; 1004 case ir_binop_sub: 1005 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 1006 break; 1007 1008 case ir_binop_mul: 1009 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 1010 break; 1011 case ir_binop_div: 1012 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1013 break; 1014 case ir_binop_mod: 1015 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1016 break; 1017 1018 case ir_binop_less: 1019 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1020 inst->conditional_mod = BRW_CONDITIONAL_L; 1021 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1022 break; 1023 case ir_binop_greater: 1024 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1025 inst->conditional_mod = BRW_CONDITIONAL_G; 1026 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1027 break; 1028 case ir_binop_lequal: 1029 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1030 inst->conditional_mod = BRW_CONDITIONAL_LE; 1031 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1032 break; 1033 case ir_binop_gequal: 1034 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1035 inst->conditional_mod = BRW_CONDITIONAL_GE; 1036 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1037 break; 1038 case ir_binop_equal: 1039 case ir_binop_all_equal: /* same as nequal for scalars */ 1040 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1041 inst->conditional_mod = BRW_CONDITIONAL_Z; 1042 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1043 break; 1044 case ir_binop_nequal: 1045 case ir_binop_any_nequal: /* same as nequal for scalars */ 1046 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1047 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1048 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1049 break; 1050 1051 case ir_binop_logic_xor: 1052 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1053 break; 1054 1055 case ir_binop_logic_or: 1056 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1057 break; 1058 1059 case ir_binop_logic_and: 1060 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1061 break; 1062 1063 case ir_binop_dot: 1064 case ir_binop_cross: 1065 case ir_unop_any: 1066 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 1067 break; 1068 1069 case ir_unop_noise: 1070 assert(!"not reached: should be handled by lower_noise"); 1071 break; 1072 1073 case ir_unop_sqrt: 1074 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 1075 break; 1076 1077 case ir_unop_rsq: 1078 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 1079 break; 1080 1081 case ir_unop_i2f: 1082 case ir_unop_b2f: 1083 case ir_unop_b2i: 1084 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1085 break; 1086 case ir_unop_f2i: 1087 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1088 break; 1089 case ir_unop_f2b: 1090 case ir_unop_i2b: 1091 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 1092 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1093 1094 case ir_unop_trunc: 1095 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1096 break; 1097 case ir_unop_ceil: 1098 op[0].negate = ~op[0].negate; 1099 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1100 this->result.negate = true; 1101 break; 1102 case ir_unop_floor: 1103 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1104 break; 1105 case ir_unop_fract: 1106 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1107 break; 1108 1109 case ir_binop_min: 1110 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1111 inst->conditional_mod = BRW_CONDITIONAL_L; 1112 1113 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1114 inst->predicated = true; 1115 break; 1116 case ir_binop_max: 1117 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1118 inst->conditional_mod = BRW_CONDITIONAL_G; 1119 1120 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1121 inst->predicated = true; 1122 break; 1123 1124 case ir_binop_pow: 1125 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1126 break; 1127 1128 case ir_unop_bit_not: 1129 case ir_unop_u2f: 1130 case ir_binop_lshift: 1131 case ir_binop_rshift: 1132 case ir_binop_bit_and: 1133 case ir_binop_bit_xor: 1134 case ir_binop_bit_or: 1135 assert(!"GLSL 1.30 features unsupported"); 1136 break; 1137 } 1138} 1139 1140void 1141fs_visitor::visit(ir_assignment *ir) 1142{ 1143 struct fs_reg l, r; 1144 int i; 1145 int write_mask; 1146 fs_inst *inst; 1147 1148 /* FINISHME: arrays on the lhs */ 1149 ir->lhs->accept(this); 1150 l = this->result; 1151 1152 ir->rhs->accept(this); 1153 r = this->result; 1154 1155 /* FINISHME: This should really set to the correct maximal writemask for each 1156 * FINISHME: component written (in the loops below). This case can only 1157 * FINISHME: occur for matrices, arrays, and structures. 1158 */ 1159 if (ir->write_mask == 0) { 1160 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1161 write_mask = WRITEMASK_XYZW; 1162 } else { 1163 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 1164 write_mask = ir->write_mask; 1165 } 1166 1167 assert(l.file != BAD_FILE); 1168 assert(r.file != BAD_FILE); 1169 1170 if (ir->condition) { 1171 /* Get the condition bool into the predicate. */ 1172 ir->condition->accept(this); 1173 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1174 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1175 } 1176 1177 for (i = 0; i < type_size(ir->lhs->type); i++) { 1178 if (i >= 4 || (write_mask & (1 << i))) { 1179 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1180 if (ir->condition) 1181 inst->predicated = true; 1182 r.reg_offset++; 1183 } 1184 l.reg_offset++; 1185 } 1186} 1187 1188void 1189fs_visitor::visit(ir_texture *ir) 1190{ 1191 int base_mrf = 2; 1192 fs_inst *inst = NULL; 1193 unsigned int mlen = 0; 1194 1195 ir->coordinate->accept(this); 1196 fs_reg coordinate = this->result; 1197 1198 if (ir->projector) { 1199 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 1200 1201 ir->projector->accept(this); 1202 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 1203 1204 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 1205 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1206 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 1207 coordinate.reg_offset++; 1208 proj_coordinate.reg_offset++; 1209 } 1210 proj_coordinate.reg_offset = 0; 1211 1212 coordinate = proj_coordinate; 1213 } 1214 1215 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1216 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1217 coordinate.reg_offset++; 1218 } 1219 1220 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 1221 if (intel->gen < 5) 1222 mlen = 3; 1223 1224 if (ir->shadow_comparitor) { 1225 /* For shadow comparisons, we have to supply u,v,r. */ 1226 mlen = 3; 1227 1228 ir->shadow_comparitor->accept(this); 1229 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1230 mlen++; 1231 } 1232 1233 /* Do we ever want to handle writemasking on texture samples? Is it 1234 * performance relevant? 1235 */ 1236 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1237 1238 switch (ir->op) { 1239 case ir_tex: 1240 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1241 break; 1242 case ir_txb: 1243 ir->lod_info.bias->accept(this); 1244 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1245 mlen++; 1246 1247 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1248 break; 1249 case ir_txl: 1250 ir->lod_info.lod->accept(this); 1251 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1252 mlen++; 1253 1254 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1255 break; 1256 case ir_txd: 1257 case ir_txf: 1258 assert(!"GLSL 1.30 features unsupported"); 1259 break; 1260 } 1261 1262 inst->sampler = 1263 _mesa_get_sampler_uniform_value(ir->sampler, 1264 ctx->Shader.CurrentProgram, 1265 &brw->fragment_program->Base); 1266 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1267 1268 this->result = dst; 1269 1270 if (ir->shadow_comparitor) 1271 inst->shadow_compare = true; 1272 inst->mlen = mlen; 1273} 1274 1275void 1276fs_visitor::visit(ir_swizzle *ir) 1277{ 1278 ir->val->accept(this); 1279 fs_reg val = this->result; 1280 1281 fs_reg result = fs_reg(this, ir->type); 1282 this->result = result; 1283 1284 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1285 fs_reg channel = val; 1286 int swiz = 0; 1287 1288 switch (i) { 1289 case 0: 1290 swiz = ir->mask.x; 1291 break; 1292 case 1: 1293 swiz = ir->mask.y; 1294 break; 1295 case 2: 1296 swiz = ir->mask.z; 1297 break; 1298 case 3: 1299 swiz = ir->mask.w; 1300 break; 1301 } 1302 1303 channel.reg_offset += swiz; 1304 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1305 result.reg_offset++; 1306 } 1307} 1308 1309void 1310fs_visitor::visit(ir_discard *ir) 1311{ 1312 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1313 1314 assert(ir->condition == NULL); /* FINISHME */ 1315 1316 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp)); 1317} 1318 1319void 1320fs_visitor::visit(ir_constant *ir) 1321{ 1322 fs_reg reg(this, ir->type); 1323 this->result = reg; 1324 1325 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1326 switch (ir->type->base_type) { 1327 case GLSL_TYPE_FLOAT: 1328 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1329 break; 1330 case GLSL_TYPE_UINT: 1331 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1332 break; 1333 case GLSL_TYPE_INT: 1334 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1335 break; 1336 case GLSL_TYPE_BOOL: 1337 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1338 break; 1339 default: 1340 assert(!"Non-float/uint/int/bool constant"); 1341 } 1342 reg.reg_offset++; 1343 } 1344} 1345 1346void 1347fs_visitor::visit(ir_if *ir) 1348{ 1349 fs_inst *inst; 1350 1351 /* Don't point the annotation at the if statement, because then it plus 1352 * the then and else blocks get printed. 1353 */ 1354 this->base_ir = ir->condition; 1355 1356 /* Generate the condition into the condition code. */ 1357 ir->condition->accept(this); 1358 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1359 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1360 1361 inst = emit(fs_inst(BRW_OPCODE_IF)); 1362 inst->predicated = true; 1363 1364 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1365 ir_instruction *ir = (ir_instruction *)iter.get(); 1366 this->base_ir = ir; 1367 1368 ir->accept(this); 1369 } 1370 1371 if (!ir->else_instructions.is_empty()) { 1372 emit(fs_inst(BRW_OPCODE_ELSE)); 1373 1374 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1375 ir_instruction *ir = (ir_instruction *)iter.get(); 1376 this->base_ir = ir; 1377 1378 ir->accept(this); 1379 } 1380 } 1381 1382 emit(fs_inst(BRW_OPCODE_ENDIF)); 1383} 1384 1385void 1386fs_visitor::visit(ir_loop *ir) 1387{ 1388 fs_reg counter = reg_undef; 1389 1390 if (ir->counter) { 1391 this->base_ir = ir->counter; 1392 ir->counter->accept(this); 1393 counter = *(variable_storage(ir->counter)); 1394 1395 if (ir->from) { 1396 this->base_ir = ir->from; 1397 ir->from->accept(this); 1398 1399 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1400 } 1401 } 1402 1403 /* Start a safety counter. If the user messed up their loop 1404 * counting, we don't want to hang the GPU. 1405 */ 1406 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1407 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1408 1409 emit(fs_inst(BRW_OPCODE_DO)); 1410 1411 if (ir->to) { 1412 this->base_ir = ir->to; 1413 ir->to->accept(this); 1414 1415 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1416 counter, this->result)); 1417 switch (ir->cmp) { 1418 case ir_binop_equal: 1419 inst->conditional_mod = BRW_CONDITIONAL_Z; 1420 break; 1421 case ir_binop_nequal: 1422 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1423 break; 1424 case ir_binop_gequal: 1425 inst->conditional_mod = BRW_CONDITIONAL_GE; 1426 break; 1427 case ir_binop_lequal: 1428 inst->conditional_mod = BRW_CONDITIONAL_LE; 1429 break; 1430 case ir_binop_greater: 1431 inst->conditional_mod = BRW_CONDITIONAL_G; 1432 break; 1433 case ir_binop_less: 1434 inst->conditional_mod = BRW_CONDITIONAL_L; 1435 break; 1436 default: 1437 assert(!"not reached: unknown loop condition"); 1438 this->fail = true; 1439 break; 1440 } 1441 1442 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1443 inst->predicated = true; 1444 } 1445 1446 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1447 ir_instruction *ir = (ir_instruction *)iter.get(); 1448 fs_inst *inst; 1449 1450 this->base_ir = ir; 1451 ir->accept(this); 1452 1453 /* Check the maximum loop iters counter. */ 1454 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1455 inst->conditional_mod = BRW_CONDITIONAL_Z; 1456 1457 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1458 inst->predicated = true; 1459 } 1460 1461 if (ir->increment) { 1462 this->base_ir = ir->increment; 1463 ir->increment->accept(this); 1464 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1465 } 1466 1467 emit(fs_inst(BRW_OPCODE_WHILE)); 1468} 1469 1470void 1471fs_visitor::visit(ir_loop_jump *ir) 1472{ 1473 switch (ir->mode) { 1474 case ir_loop_jump::jump_break: 1475 emit(fs_inst(BRW_OPCODE_BREAK)); 1476 break; 1477 case ir_loop_jump::jump_continue: 1478 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1479 break; 1480 } 1481} 1482 1483void 1484fs_visitor::visit(ir_call *ir) 1485{ 1486 assert(!"FINISHME"); 1487} 1488 1489void 1490fs_visitor::visit(ir_return *ir) 1491{ 1492 assert(!"FINISHME"); 1493} 1494 1495void 1496fs_visitor::visit(ir_function *ir) 1497{ 1498 /* Ignore function bodies other than main() -- we shouldn't see calls to 1499 * them since they should all be inlined before we get to ir_to_mesa. 1500 */ 1501 if (strcmp(ir->name, "main") == 0) { 1502 const ir_function_signature *sig; 1503 exec_list empty; 1504 1505 sig = ir->matching_signature(&empty); 1506 1507 assert(sig); 1508 1509 foreach_iter(exec_list_iterator, iter, sig->body) { 1510 ir_instruction *ir = (ir_instruction *)iter.get(); 1511 this->base_ir = ir; 1512 1513 ir->accept(this); 1514 } 1515 } 1516} 1517 1518void 1519fs_visitor::visit(ir_function_signature *ir) 1520{ 1521 assert(!"not reached"); 1522 (void)ir; 1523} 1524 1525fs_inst * 1526fs_visitor::emit(fs_inst inst) 1527{ 1528 fs_inst *list_inst = new(mem_ctx) fs_inst; 1529 *list_inst = inst; 1530 1531 list_inst->annotation = this->current_annotation; 1532 list_inst->ir = this->base_ir; 1533 1534 this->instructions.push_tail(list_inst); 1535 1536 return list_inst; 1537} 1538 1539/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1540void 1541fs_visitor::emit_dummy_fs() 1542{ 1543 /* Everyone's favorite color. */ 1544 emit(fs_inst(BRW_OPCODE_MOV, 1545 fs_reg(MRF, 2), 1546 fs_reg(1.0f))); 1547 emit(fs_inst(BRW_OPCODE_MOV, 1548 fs_reg(MRF, 3), 1549 fs_reg(0.0f))); 1550 emit(fs_inst(BRW_OPCODE_MOV, 1551 fs_reg(MRF, 4), 1552 fs_reg(1.0f))); 1553 emit(fs_inst(BRW_OPCODE_MOV, 1554 fs_reg(MRF, 5), 1555 fs_reg(0.0f))); 1556 1557 fs_inst *write; 1558 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1559 fs_reg(0), 1560 fs_reg(0))); 1561} 1562 1563/* The register location here is relative to the start of the URB 1564 * data. It will get adjusted to be a real location before 1565 * generate_code() time. 1566 */ 1567struct brw_reg 1568fs_visitor::interp_reg(int location, int channel) 1569{ 1570 int regnr = location * 2 + channel / 2; 1571 int stride = (channel & 1) * 4; 1572 1573 return brw_vec1_grf(regnr, stride); 1574} 1575 1576/** Emits the interpolation for the varying inputs. */ 1577void 1578fs_visitor::emit_interpolation_setup() 1579{ 1580 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1581 1582 this->current_annotation = "compute pixel centers"; 1583 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1584 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1585 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1586 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1587 emit(fs_inst(BRW_OPCODE_ADD, 1588 this->pixel_x, 1589 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1590 fs_reg(brw_imm_v(0x10101010)))); 1591 emit(fs_inst(BRW_OPCODE_ADD, 1592 this->pixel_y, 1593 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1594 fs_reg(brw_imm_v(0x11001100)))); 1595 1596 this->current_annotation = "compute pixel deltas from v0"; 1597 this->delta_x = fs_reg(this, glsl_type::float_type); 1598 this->delta_y = fs_reg(this, glsl_type::float_type); 1599 emit(fs_inst(BRW_OPCODE_ADD, 1600 this->delta_x, 1601 this->pixel_x, 1602 fs_reg(negate(brw_vec1_grf(1, 0))))); 1603 emit(fs_inst(BRW_OPCODE_ADD, 1604 this->delta_y, 1605 this->pixel_y, 1606 fs_reg(negate(brw_vec1_grf(1, 1))))); 1607 1608 this->current_annotation = "compute pos.w and 1/pos.w"; 1609 /* Compute wpos.w. It's always in our setup, since it's needed to 1610 * interpolate the other attributes. 1611 */ 1612 this->wpos_w = fs_reg(this, glsl_type::float_type); 1613 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1614 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1615 /* Compute the pixel 1/W value from wpos.w. */ 1616 this->pixel_w = fs_reg(this, glsl_type::float_type); 1617 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1618 this->current_annotation = NULL; 1619} 1620 1621void 1622fs_visitor::emit_fb_writes() 1623{ 1624 this->current_annotation = "FB write header"; 1625 int nr = 0; 1626 1627 /* m0, m1 header */ 1628 nr += 2; 1629 1630 if (c->key.aa_dest_stencil_reg) { 1631 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1632 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1633 } 1634 1635 /* Reserve space for color. It'll be filled in per MRT below. */ 1636 int color_mrf = nr; 1637 nr += 4; 1638 1639 if (c->key.source_depth_to_render_target) { 1640 if (c->key.computes_depth) { 1641 /* Hand over gl_FragDepth. */ 1642 assert(this->frag_depth); 1643 fs_reg depth = *(variable_storage(this->frag_depth)); 1644 1645 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1646 } else { 1647 /* Pass through the payload depth. */ 1648 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1649 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1650 } 1651 } 1652 1653 if (c->key.dest_depth_reg) { 1654 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1655 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1656 } 1657 1658 fs_reg color = reg_undef; 1659 if (this->frag_color) 1660 color = *(variable_storage(this->frag_color)); 1661 else if (this->frag_data) 1662 color = *(variable_storage(this->frag_data)); 1663 1664 for (int target = 0; target < c->key.nr_color_regions; target++) { 1665 this->current_annotation = talloc_asprintf(this->mem_ctx, 1666 "FB write target %d", 1667 target); 1668 if (this->frag_color || this->frag_data) { 1669 for (int i = 0; i < 4; i++) { 1670 emit(fs_inst(BRW_OPCODE_MOV, 1671 fs_reg(MRF, color_mrf + i), 1672 color)); 1673 color.reg_offset++; 1674 } 1675 } 1676 1677 if (this->frag_color) 1678 color.reg_offset -= 4; 1679 1680 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1681 reg_undef, reg_undef)); 1682 inst->target = target; 1683 inst->mlen = nr; 1684 if (target == c->key.nr_color_regions - 1) 1685 inst->eot = true; 1686 } 1687 1688 if (c->key.nr_color_regions == 0) { 1689 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1690 reg_undef, reg_undef)); 1691 inst->mlen = nr; 1692 inst->eot = true; 1693 } 1694 1695 this->current_annotation = NULL; 1696} 1697 1698void 1699fs_visitor::generate_fb_write(fs_inst *inst) 1700{ 1701 GLboolean eot = inst->eot; 1702 1703 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1704 * move, here's g1. 1705 */ 1706 brw_push_insn_state(p); 1707 brw_set_mask_control(p, BRW_MASK_DISABLE); 1708 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1709 brw_MOV(p, 1710 brw_message_reg(1), 1711 brw_vec8_grf(1, 0)); 1712 brw_pop_insn_state(p); 1713 1714 brw_fb_WRITE(p, 1715 8, /* dispatch_width */ 1716 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1717 0, /* base MRF */ 1718 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1719 inst->target, 1720 inst->mlen, 1721 0, 1722 eot); 1723} 1724 1725void 1726fs_visitor::generate_linterp(fs_inst *inst, 1727 struct brw_reg dst, struct brw_reg *src) 1728{ 1729 struct brw_reg delta_x = src[0]; 1730 struct brw_reg delta_y = src[1]; 1731 struct brw_reg interp = src[2]; 1732 1733 if (brw->has_pln && 1734 delta_y.nr == delta_x.nr + 1 && 1735 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1736 brw_PLN(p, dst, interp, delta_x); 1737 } else { 1738 brw_LINE(p, brw_null_reg(), interp, delta_x); 1739 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1740 } 1741} 1742 1743void 1744fs_visitor::generate_math(fs_inst *inst, 1745 struct brw_reg dst, struct brw_reg *src) 1746{ 1747 int op; 1748 1749 switch (inst->opcode) { 1750 case FS_OPCODE_RCP: 1751 op = BRW_MATH_FUNCTION_INV; 1752 break; 1753 case FS_OPCODE_RSQ: 1754 op = BRW_MATH_FUNCTION_RSQ; 1755 break; 1756 case FS_OPCODE_SQRT: 1757 op = BRW_MATH_FUNCTION_SQRT; 1758 break; 1759 case FS_OPCODE_EXP2: 1760 op = BRW_MATH_FUNCTION_EXP; 1761 break; 1762 case FS_OPCODE_LOG2: 1763 op = BRW_MATH_FUNCTION_LOG; 1764 break; 1765 case FS_OPCODE_POW: 1766 op = BRW_MATH_FUNCTION_POW; 1767 break; 1768 case FS_OPCODE_SIN: 1769 op = BRW_MATH_FUNCTION_SIN; 1770 break; 1771 case FS_OPCODE_COS: 1772 op = BRW_MATH_FUNCTION_COS; 1773 break; 1774 default: 1775 assert(!"not reached: unknown math function"); 1776 op = 0; 1777 break; 1778 } 1779 1780 if (inst->opcode == FS_OPCODE_POW) { 1781 brw_MOV(p, brw_message_reg(3), src[1]); 1782 } 1783 1784 brw_math(p, dst, 1785 op, 1786 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1787 BRW_MATH_SATURATE_NONE, 1788 2, src[0], 1789 BRW_MATH_DATA_VECTOR, 1790 BRW_MATH_PRECISION_FULL); 1791} 1792 1793void 1794fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1795{ 1796 int msg_type = -1; 1797 int rlen = 4; 1798 1799 if (intel->gen == 5) { 1800 switch (inst->opcode) { 1801 case FS_OPCODE_TEX: 1802 if (inst->shadow_compare) { 1803 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1804 } else { 1805 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1806 } 1807 break; 1808 case FS_OPCODE_TXB: 1809 if (inst->shadow_compare) { 1810 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1811 } else { 1812 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1813 } 1814 break; 1815 } 1816 } else { 1817 switch (inst->opcode) { 1818 case FS_OPCODE_TEX: 1819 /* Note that G45 and older determines shadow compare and dispatch width 1820 * from message length for most messages. 1821 */ 1822 if (inst->shadow_compare) { 1823 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1824 } else { 1825 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1826 } 1827 case FS_OPCODE_TXB: 1828 if (inst->shadow_compare) { 1829 assert(!"FINISHME: shadow compare with bias."); 1830 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1831 } else { 1832 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1833 rlen = 8; 1834 } 1835 break; 1836 } 1837 } 1838 assert(msg_type != -1); 1839 1840 /* g0 header. */ 1841 src.nr--; 1842 1843 brw_SAMPLE(p, 1844 retype(dst, BRW_REGISTER_TYPE_UW), 1845 src.nr, 1846 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1847 SURF_INDEX_TEXTURE(inst->sampler), 1848 inst->sampler, 1849 WRITEMASK_XYZW, 1850 msg_type, 1851 rlen, 1852 inst->mlen + 1, 1853 0, 1854 1, 1855 BRW_SAMPLER_SIMD_MODE_SIMD8); 1856} 1857 1858 1859/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1860 * looking like: 1861 * 1862 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1863 * 1864 * and we're trying to produce: 1865 * 1866 * DDX DDY 1867 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1868 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1869 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1870 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1871 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1872 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1873 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1874 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1875 * 1876 * and add another set of two more subspans if in 16-pixel dispatch mode. 1877 * 1878 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1879 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1880 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1881 * between each other. We could probably do it like ddx and swizzle the right 1882 * order later, but bail for now and just produce 1883 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1884 */ 1885void 1886fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1887{ 1888 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1889 BRW_REGISTER_TYPE_F, 1890 BRW_VERTICAL_STRIDE_2, 1891 BRW_WIDTH_2, 1892 BRW_HORIZONTAL_STRIDE_0, 1893 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1894 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1895 BRW_REGISTER_TYPE_F, 1896 BRW_VERTICAL_STRIDE_2, 1897 BRW_WIDTH_2, 1898 BRW_HORIZONTAL_STRIDE_0, 1899 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1900 brw_ADD(p, dst, src0, negate(src1)); 1901} 1902 1903void 1904fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1905{ 1906 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1907 BRW_REGISTER_TYPE_F, 1908 BRW_VERTICAL_STRIDE_4, 1909 BRW_WIDTH_4, 1910 BRW_HORIZONTAL_STRIDE_0, 1911 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1912 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1913 BRW_REGISTER_TYPE_F, 1914 BRW_VERTICAL_STRIDE_4, 1915 BRW_WIDTH_4, 1916 BRW_HORIZONTAL_STRIDE_0, 1917 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1918 brw_ADD(p, dst, src0, negate(src1)); 1919} 1920 1921void 1922fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp) 1923{ 1924 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1925 temp = brw_uw1_reg(temp.file, temp.nr, 0); 1926 1927 brw_push_insn_state(p); 1928 brw_set_mask_control(p, BRW_MASK_DISABLE); 1929 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */ 1930 brw_AND(p, g0, temp, g0); 1931 brw_pop_insn_state(p); 1932} 1933 1934void 1935fs_visitor::assign_curb_setup() 1936{ 1937 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1938 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1939 1940 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1941 c->prog_data.curb_read_length) & 1) { 1942 /* Align the start of the interpolation coefficients so that we can use 1943 * the PLN instruction. 1944 */ 1945 c->prog_data.first_curbe_grf++; 1946 } 1947 1948 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1949 foreach_iter(exec_list_iterator, iter, this->instructions) { 1950 fs_inst *inst = (fs_inst *)iter.get(); 1951 1952 for (unsigned int i = 0; i < 3; i++) { 1953 if (inst->src[i].file == UNIFORM) { 1954 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1955 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1956 constant_nr / 8, 1957 constant_nr % 8); 1958 1959 inst->src[i].file = FIXED_HW_REG; 1960 inst->src[i].fixed_hw_reg = brw_reg; 1961 } 1962 } 1963 } 1964} 1965 1966void 1967fs_visitor::assign_urb_setup() 1968{ 1969 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1970 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1971 1972 c->prog_data.urb_read_length = 0; 1973 1974 /* Figure out where each of the incoming setup attributes lands. */ 1975 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1976 interp_reg_nr[i] = -1; 1977 1978 if (i != FRAG_ATTRIB_WPOS && 1979 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1980 continue; 1981 1982 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1983 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1984 c->prog_data.urb_read_length += 2; 1985 } 1986 1987 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1988 * the correct setup input. 1989 */ 1990 foreach_iter(exec_list_iterator, iter, this->instructions) { 1991 fs_inst *inst = (fs_inst *)iter.get(); 1992 1993 if (inst->opcode != FS_OPCODE_LINTERP) 1994 continue; 1995 1996 assert(inst->src[2].file == FIXED_HW_REG); 1997 1998 int location = inst->src[2].fixed_hw_reg.nr / 2; 1999 assert(interp_reg_nr[location] != -1); 2000 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 2001 (inst->src[2].fixed_hw_reg.nr & 1)); 2002 } 2003 2004 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2005} 2006 2007static void 2008assign_reg(int *reg_hw_locations, fs_reg *reg) 2009{ 2010 if (reg->file == GRF && reg->reg != 0) { 2011 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2012 reg->reg = 0; 2013 } 2014} 2015 2016void 2017fs_visitor::assign_regs_trivial() 2018{ 2019 int last_grf = 0; 2020 int hw_reg_mapping[this->virtual_grf_next]; 2021 int i; 2022 2023 hw_reg_mapping[0] = 0; 2024 hw_reg_mapping[1] = this->first_non_payload_grf; 2025 for (i = 2; i < this->virtual_grf_next; i++) { 2026 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2027 this->virtual_grf_sizes[i - 1]); 2028 } 2029 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2030 2031 foreach_iter(exec_list_iterator, iter, this->instructions) { 2032 fs_inst *inst = (fs_inst *)iter.get(); 2033 2034 assign_reg(hw_reg_mapping, &inst->dst); 2035 assign_reg(hw_reg_mapping, &inst->src[0]); 2036 assign_reg(hw_reg_mapping, &inst->src[1]); 2037 } 2038 2039 this->grf_used = last_grf + 1; 2040} 2041 2042void 2043fs_visitor::assign_regs() 2044{ 2045 int last_grf = 0; 2046 int hw_reg_mapping[this->virtual_grf_next + 1]; 2047 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2048 int class_sizes[base_reg_count]; 2049 int class_count = 0; 2050 2051 calculate_live_intervals(); 2052 2053 /* Set up the register classes. 2054 * 2055 * The base registers store a scalar value. For texture samples, 2056 * we get virtual GRFs composed of 4 contiguous hw register. For 2057 * structures and arrays, we store them as contiguous larger things 2058 * than that, though we should be able to do better most of the 2059 * time. 2060 */ 2061 class_sizes[class_count++] = 1; 2062 for (int r = 1; r < this->virtual_grf_next; r++) { 2063 int i; 2064 2065 for (i = 0; i < class_count; i++) { 2066 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2067 break; 2068 } 2069 if (i == class_count) { 2070 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2071 } 2072 } 2073 2074 int ra_reg_count = 0; 2075 int class_base_reg[class_count]; 2076 int class_reg_count[class_count]; 2077 int classes[class_count]; 2078 2079 for (int i = 0; i < class_count; i++) { 2080 class_base_reg[i] = ra_reg_count; 2081 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2082 ra_reg_count += class_reg_count[i]; 2083 } 2084 2085 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2086 for (int i = 0; i < class_count; i++) { 2087 classes[i] = ra_alloc_reg_class(regs); 2088 2089 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2090 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2091 } 2092 2093 /* Add conflicts between our contiguous registers aliasing 2094 * base regs and other register classes' contiguous registers 2095 * that alias base regs, or the base regs themselves for classes[0]. 2096 */ 2097 for (int c = 0; c <= i; c++) { 2098 for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) { 2099 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2100 c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1); 2101 c_r++) { 2102 2103 if (0) { 2104 printf("%d/%d conflicts %d/%d\n", 2105 class_sizes[i], i_r, 2106 class_sizes[c], c_r); 2107 } 2108 2109 ra_add_reg_conflict(regs, 2110 class_base_reg[i] + i_r, 2111 class_base_reg[c] + c_r); 2112 } 2113 } 2114 } 2115 } 2116 2117 ra_set_finalize(regs); 2118 2119 struct ra_graph *g = ra_alloc_interference_graph(regs, 2120 this->virtual_grf_next); 2121 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2122 * with nodes. 2123 */ 2124 ra_set_node_class(g, 0, classes[0]); 2125 2126 for (int i = 1; i < this->virtual_grf_next; i++) { 2127 for (int c = 0; c < class_count; c++) { 2128 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2129 ra_set_node_class(g, i, classes[c]); 2130 break; 2131 } 2132 } 2133 2134 for (int j = 1; j < i; j++) { 2135 if (virtual_grf_interferes(i, j)) { 2136 ra_add_node_interference(g, i, j); 2137 } 2138 } 2139 } 2140 2141 /* FINISHME: Handle spilling */ 2142 if (!ra_allocate_no_spills(g)) { 2143 fprintf(stderr, "Failed to allocate registers.\n"); 2144 this->fail = true; 2145 return; 2146 } 2147 2148 /* Get the chosen virtual registers for each node, and map virtual 2149 * regs in the register classes back down to real hardware reg 2150 * numbers. 2151 */ 2152 hw_reg_mapping[0] = 0; /* unused */ 2153 for (int i = 1; i < this->virtual_grf_next; i++) { 2154 int reg = ra_get_node_reg(g, i); 2155 int hw_reg = -1; 2156 2157 for (int c = 0; c < class_count; c++) { 2158 if (reg >= class_base_reg[c] && 2159 reg < class_base_reg[c] + class_reg_count[c] - 1) { 2160 hw_reg = reg - class_base_reg[c]; 2161 break; 2162 } 2163 } 2164 2165 assert(hw_reg != -1); 2166 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2167 last_grf = MAX2(last_grf, 2168 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2169 } 2170 2171 foreach_iter(exec_list_iterator, iter, this->instructions) { 2172 fs_inst *inst = (fs_inst *)iter.get(); 2173 2174 assign_reg(hw_reg_mapping, &inst->dst); 2175 assign_reg(hw_reg_mapping, &inst->src[0]); 2176 assign_reg(hw_reg_mapping, &inst->src[1]); 2177 } 2178 2179 this->grf_used = last_grf + 1; 2180 2181 talloc_free(g); 2182 talloc_free(regs); 2183} 2184 2185void 2186fs_visitor::calculate_live_intervals() 2187{ 2188 int num_vars = this->virtual_grf_next; 2189 int *def = talloc_array(mem_ctx, int, num_vars); 2190 int *use = talloc_array(mem_ctx, int, num_vars); 2191 int loop_depth = 0; 2192 int loop_start = 0; 2193 2194 for (int i = 0; i < num_vars; i++) { 2195 def[i] = 1 << 30; 2196 use[i] = 0; 2197 } 2198 2199 int ip = 0; 2200 foreach_iter(exec_list_iterator, iter, this->instructions) { 2201 fs_inst *inst = (fs_inst *)iter.get(); 2202 2203 if (inst->opcode == BRW_OPCODE_DO) { 2204 if (loop_depth++ == 0) 2205 loop_start = ip; 2206 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2207 loop_depth--; 2208 2209 if (loop_depth == 0) { 2210 /* FINISHME: 2211 * 2212 * Patches up any vars marked for use within the loop as 2213 * live until the end. This is conservative, as there 2214 * will often be variables defined and used inside the 2215 * loop but dead at the end of the loop body. 2216 */ 2217 for (int i = 0; i < num_vars; i++) { 2218 if (use[i] == loop_start) { 2219 use[i] = ip; 2220 } 2221 } 2222 } 2223 } else { 2224 int eip = ip; 2225 2226 if (loop_depth) 2227 eip = loop_start; 2228 2229 for (unsigned int i = 0; i < 3; i++) { 2230 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2231 def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip); 2232 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2233 } 2234 } 2235 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2236 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2237 use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip); 2238 } 2239 } 2240 2241 ip++; 2242 } 2243 2244 this->virtual_grf_def = def; 2245 this->virtual_grf_use = use; 2246} 2247 2248bool 2249fs_visitor::virtual_grf_interferes(int a, int b) 2250{ 2251 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2252 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2253 2254 return start <= end; 2255} 2256 2257static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2258{ 2259 struct brw_reg brw_reg; 2260 2261 switch (reg->file) { 2262 case GRF: 2263 case ARF: 2264 case MRF: 2265 brw_reg = brw_vec8_reg(reg->file, 2266 reg->hw_reg, 0); 2267 brw_reg = retype(brw_reg, reg->type); 2268 break; 2269 case IMM: 2270 switch (reg->type) { 2271 case BRW_REGISTER_TYPE_F: 2272 brw_reg = brw_imm_f(reg->imm.f); 2273 break; 2274 case BRW_REGISTER_TYPE_D: 2275 brw_reg = brw_imm_d(reg->imm.i); 2276 break; 2277 case BRW_REGISTER_TYPE_UD: 2278 brw_reg = brw_imm_ud(reg->imm.u); 2279 break; 2280 default: 2281 assert(!"not reached"); 2282 break; 2283 } 2284 break; 2285 case FIXED_HW_REG: 2286 brw_reg = reg->fixed_hw_reg; 2287 break; 2288 case BAD_FILE: 2289 /* Probably unused. */ 2290 brw_reg = brw_null_reg(); 2291 break; 2292 case UNIFORM: 2293 assert(!"not reached"); 2294 brw_reg = brw_null_reg(); 2295 break; 2296 } 2297 if (reg->abs) 2298 brw_reg = brw_abs(brw_reg); 2299 if (reg->negate) 2300 brw_reg = negate(brw_reg); 2301 2302 return brw_reg; 2303} 2304 2305void 2306fs_visitor::generate_code() 2307{ 2308 unsigned int annotation_len = 0; 2309 int last_native_inst = 0; 2310 struct brw_instruction *if_stack[16], *loop_stack[16]; 2311 int if_stack_depth = 0, loop_stack_depth = 0; 2312 int if_depth_in_loop[16]; 2313 2314 if_depth_in_loop[loop_stack_depth] = 0; 2315 2316 memset(&if_stack, 0, sizeof(if_stack)); 2317 foreach_iter(exec_list_iterator, iter, this->instructions) { 2318 fs_inst *inst = (fs_inst *)iter.get(); 2319 struct brw_reg src[3], dst; 2320 2321 for (unsigned int i = 0; i < 3; i++) { 2322 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2323 } 2324 dst = brw_reg_from_fs_reg(&inst->dst); 2325 2326 brw_set_conditionalmod(p, inst->conditional_mod); 2327 brw_set_predicate_control(p, inst->predicated); 2328 2329 switch (inst->opcode) { 2330 case BRW_OPCODE_MOV: 2331 brw_MOV(p, dst, src[0]); 2332 break; 2333 case BRW_OPCODE_ADD: 2334 brw_ADD(p, dst, src[0], src[1]); 2335 break; 2336 case BRW_OPCODE_MUL: 2337 brw_MUL(p, dst, src[0], src[1]); 2338 break; 2339 2340 case BRW_OPCODE_FRC: 2341 brw_FRC(p, dst, src[0]); 2342 break; 2343 case BRW_OPCODE_RNDD: 2344 brw_RNDD(p, dst, src[0]); 2345 break; 2346 case BRW_OPCODE_RNDZ: 2347 brw_RNDZ(p, dst, src[0]); 2348 break; 2349 2350 case BRW_OPCODE_AND: 2351 brw_AND(p, dst, src[0], src[1]); 2352 break; 2353 case BRW_OPCODE_OR: 2354 brw_OR(p, dst, src[0], src[1]); 2355 break; 2356 case BRW_OPCODE_XOR: 2357 brw_XOR(p, dst, src[0], src[1]); 2358 break; 2359 2360 case BRW_OPCODE_CMP: 2361 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2362 break; 2363 case BRW_OPCODE_SEL: 2364 brw_SEL(p, dst, src[0], src[1]); 2365 break; 2366 2367 case BRW_OPCODE_IF: 2368 assert(if_stack_depth < 16); 2369 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2370 if_depth_in_loop[loop_stack_depth]++; 2371 if_stack_depth++; 2372 break; 2373 case BRW_OPCODE_ELSE: 2374 if_stack[if_stack_depth - 1] = 2375 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2376 break; 2377 case BRW_OPCODE_ENDIF: 2378 if_stack_depth--; 2379 brw_ENDIF(p , if_stack[if_stack_depth]); 2380 if_depth_in_loop[loop_stack_depth]--; 2381 break; 2382 2383 case BRW_OPCODE_DO: 2384 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2385 if_depth_in_loop[loop_stack_depth] = 0; 2386 break; 2387 2388 case BRW_OPCODE_BREAK: 2389 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2390 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2391 break; 2392 case BRW_OPCODE_CONTINUE: 2393 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2394 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2395 break; 2396 2397 case BRW_OPCODE_WHILE: { 2398 struct brw_instruction *inst0, *inst1; 2399 GLuint br = 1; 2400 2401 if (intel->gen == 5) 2402 br = 2; 2403 2404 assert(loop_stack_depth > 0); 2405 loop_stack_depth--; 2406 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2407 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2408 while (inst0 > loop_stack[loop_stack_depth]) { 2409 inst0--; 2410 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2411 inst0->bits3.if_else.jump_count == 0) { 2412 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2413 } 2414 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2415 inst0->bits3.if_else.jump_count == 0) { 2416 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2417 } 2418 } 2419 } 2420 break; 2421 2422 case FS_OPCODE_RCP: 2423 case FS_OPCODE_RSQ: 2424 case FS_OPCODE_SQRT: 2425 case FS_OPCODE_EXP2: 2426 case FS_OPCODE_LOG2: 2427 case FS_OPCODE_POW: 2428 case FS_OPCODE_SIN: 2429 case FS_OPCODE_COS: 2430 generate_math(inst, dst, src); 2431 break; 2432 case FS_OPCODE_LINTERP: 2433 generate_linterp(inst, dst, src); 2434 break; 2435 case FS_OPCODE_TEX: 2436 case FS_OPCODE_TXB: 2437 case FS_OPCODE_TXL: 2438 generate_tex(inst, dst, src[0]); 2439 break; 2440 case FS_OPCODE_DISCARD: 2441 generate_discard(inst, dst /* src0 == dst */); 2442 break; 2443 case FS_OPCODE_DDX: 2444 generate_ddx(inst, dst, src[0]); 2445 break; 2446 case FS_OPCODE_DDY: 2447 generate_ddy(inst, dst, src[0]); 2448 break; 2449 case FS_OPCODE_FB_WRITE: 2450 generate_fb_write(inst); 2451 break; 2452 default: 2453 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2454 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2455 brw_opcodes[inst->opcode].name); 2456 } else { 2457 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2458 } 2459 this->fail = true; 2460 } 2461 2462 if (annotation_len < p->nr_insn) { 2463 annotation_len *= 2; 2464 if (annotation_len < 16) 2465 annotation_len = 16; 2466 2467 this->annotation_string = talloc_realloc(this->mem_ctx, 2468 annotation_string, 2469 const char *, 2470 annotation_len); 2471 this->annotation_ir = talloc_realloc(this->mem_ctx, 2472 annotation_ir, 2473 ir_instruction *, 2474 annotation_len); 2475 } 2476 2477 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2478 this->annotation_string[i] = inst->annotation; 2479 this->annotation_ir[i] = inst->ir; 2480 } 2481 last_native_inst = p->nr_insn; 2482 } 2483} 2484 2485GLboolean 2486brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2487{ 2488 struct brw_compile *p = &c->func; 2489 struct intel_context *intel = &brw->intel; 2490 GLcontext *ctx = &intel->ctx; 2491 struct brw_shader *shader = NULL; 2492 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2493 2494 if (!prog) 2495 return GL_FALSE; 2496 2497 if (!using_new_fs) 2498 return GL_FALSE; 2499 2500 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2501 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2502 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2503 break; 2504 } 2505 } 2506 if (!shader) 2507 return GL_FALSE; 2508 2509 /* We always use 8-wide mode, at least for now. For one, flow 2510 * control only works in 8-wide. Also, when we're fragment shader 2511 * bound, we're almost always under register pressure as well, so 2512 * 8-wide would save us from the performance cliff of spilling 2513 * regs. 2514 */ 2515 c->dispatch_width = 8; 2516 2517 if (INTEL_DEBUG & DEBUG_WM) { 2518 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2519 _mesa_print_ir(shader->ir, NULL); 2520 printf("\n"); 2521 } 2522 2523 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2524 */ 2525 fs_visitor v(c, shader); 2526 2527 if (0) { 2528 v.emit_dummy_fs(); 2529 } else { 2530 v.emit_interpolation_setup(); 2531 2532 /* Generate FS IR for main(). (the visitor only descends into 2533 * functions called "main"). 2534 */ 2535 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2536 ir_instruction *ir = (ir_instruction *)iter.get(); 2537 v.base_ir = ir; 2538 ir->accept(&v); 2539 } 2540 2541 v.emit_fb_writes(); 2542 v.assign_curb_setup(); 2543 v.assign_urb_setup(); 2544 if (0) 2545 v.assign_regs_trivial(); 2546 else 2547 v.assign_regs(); 2548 } 2549 2550 v.generate_code(); 2551 2552 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2553 2554 if (v.fail) 2555 return GL_FALSE; 2556 2557 if (INTEL_DEBUG & DEBUG_WM) { 2558 const char *last_annotation_string = NULL; 2559 ir_instruction *last_annotation_ir = NULL; 2560 2561 printf("Native code for fragment shader %d:\n", prog->Name); 2562 for (unsigned int i = 0; i < p->nr_insn; i++) { 2563 if (last_annotation_ir != v.annotation_ir[i]) { 2564 last_annotation_ir = v.annotation_ir[i]; 2565 if (last_annotation_ir) { 2566 printf(" "); 2567 last_annotation_ir->print(); 2568 printf("\n"); 2569 } 2570 } 2571 if (last_annotation_string != v.annotation_string[i]) { 2572 last_annotation_string = v.annotation_string[i]; 2573 if (last_annotation_string) 2574 printf(" %s\n", last_annotation_string); 2575 } 2576 brw_disasm(stdout, &p->store[i], intel->gen); 2577 } 2578 printf("\n"); 2579 } 2580 2581 c->prog_data.total_grf = v.grf_used; 2582 c->prog_data.total_scratch = 0; 2583 2584 return GL_TRUE; 2585} 2586