brw_fs.cpp revision 21148e1c0a3cf9cf25ded006a3d5ce2b12803ea9
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/sampler.h" 39#include "program/hash_table.h" 40#include "brw_context.h" 41#include "brw_eu.h" 42#include "brw_wm.h" 43#include "talloc.h" 44} 45#include "../glsl/glsl_types.h" 46#include "../glsl/ir_optimization.h" 47#include "../glsl/ir_print_visitor.h" 48 49enum register_file { 50 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 51 GRF = BRW_GENERAL_REGISTER_FILE, 52 MRF = BRW_MESSAGE_REGISTER_FILE, 53 IMM = BRW_IMMEDIATE_VALUE, 54 FIXED_HW_REG, /* a struct brw_reg */ 55 UNIFORM, /* prog_data->params[hw_reg] */ 56 BAD_FILE 57}; 58 59enum fs_opcodes { 60 FS_OPCODE_FB_WRITE = 256, 61 FS_OPCODE_RCP, 62 FS_OPCODE_RSQ, 63 FS_OPCODE_SQRT, 64 FS_OPCODE_EXP2, 65 FS_OPCODE_LOG2, 66 FS_OPCODE_POW, 67 FS_OPCODE_SIN, 68 FS_OPCODE_COS, 69 FS_OPCODE_DDX, 70 FS_OPCODE_DDY, 71 FS_OPCODE_LINTERP, 72 FS_OPCODE_TEX, 73 FS_OPCODE_TXB, 74 FS_OPCODE_TXL, 75 FS_OPCODE_DISCARD, 76}; 77 78static int using_new_fs = -1; 79static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 80 81struct gl_shader * 82brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 83{ 84 struct brw_shader *shader; 85 86 shader = talloc_zero(NULL, struct brw_shader); 87 if (shader) { 88 shader->base.Type = type; 89 shader->base.Name = name; 90 _mesa_init_shader(ctx, &shader->base); 91 } 92 93 return &shader->base; 94} 95 96struct gl_shader_program * 97brw_new_shader_program(GLcontext *ctx, GLuint name) 98{ 99 struct brw_shader_program *prog; 100 prog = talloc_zero(NULL, struct brw_shader_program); 101 if (prog) { 102 prog->base.Name = name; 103 _mesa_init_shader_program(ctx, &prog->base); 104 } 105 return &prog->base; 106} 107 108GLboolean 109brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 110{ 111 if (!_mesa_ir_compile_shader(ctx, shader)) 112 return GL_FALSE; 113 114 return GL_TRUE; 115} 116 117GLboolean 118brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 119{ 120 if (using_new_fs == -1) 121 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 122 123 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 124 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 125 126 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 127 void *mem_ctx = talloc_new(NULL); 128 bool progress; 129 130 if (shader->ir) 131 talloc_free(shader->ir); 132 shader->ir = new(shader) exec_list; 133 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 134 135 do_mat_op_to_vec(shader->ir); 136 do_mod_to_fract(shader->ir); 137 do_div_to_mul_rcp(shader->ir); 138 do_sub_to_add_neg(shader->ir); 139 do_explog_to_explog2(shader->ir); 140 141 do { 142 progress = false; 143 144 brw_do_channel_expressions(shader->ir); 145 brw_do_vector_splitting(shader->ir); 146 147 progress = do_lower_jumps(shader->ir, true, true, 148 true, /* main return */ 149 false, /* continue */ 150 false /* loops */ 151 ) || progress; 152 153 progress = do_common_optimization(shader->ir, true, 32) || progress; 154 155 progress = lower_noise(shader->ir) || progress; 156 progress = 157 lower_variable_index_to_cond_assign(shader->ir, 158 GL_TRUE, /* input */ 159 GL_TRUE, /* output */ 160 GL_TRUE, /* temp */ 161 GL_TRUE /* uniform */ 162 ) || progress; 163 } while (progress); 164 165 validate_ir_tree(shader->ir); 166 167 reparent_ir(shader->ir, shader->ir); 168 talloc_free(mem_ctx); 169 } 170 } 171 172 if (!_mesa_ir_link_shader(ctx, prog)) 173 return GL_FALSE; 174 175 return GL_TRUE; 176} 177 178static int 179type_size(const struct glsl_type *type) 180{ 181 unsigned int size, i; 182 183 switch (type->base_type) { 184 case GLSL_TYPE_UINT: 185 case GLSL_TYPE_INT: 186 case GLSL_TYPE_FLOAT: 187 case GLSL_TYPE_BOOL: 188 return type->components(); 189 case GLSL_TYPE_ARRAY: 190 return type_size(type->fields.array) * type->length; 191 case GLSL_TYPE_STRUCT: 192 size = 0; 193 for (i = 0; i < type->length; i++) { 194 size += type_size(type->fields.structure[i].type); 195 } 196 return size; 197 case GLSL_TYPE_SAMPLER: 198 /* Samplers take up no register space, since they're baked in at 199 * link time. 200 */ 201 return 0; 202 default: 203 assert(!"not reached"); 204 return 0; 205 } 206} 207 208class fs_reg { 209public: 210 /* Callers of this talloc-based new need not call delete. It's 211 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 212 static void* operator new(size_t size, void *ctx) 213 { 214 void *node; 215 216 node = talloc_size(ctx, size); 217 assert(node != NULL); 218 219 return node; 220 } 221 222 void init() 223 { 224 this->reg = 0; 225 this->reg_offset = 0; 226 this->negate = 0; 227 this->abs = 0; 228 this->hw_reg = -1; 229 } 230 231 /** Generic unset register constructor. */ 232 fs_reg() 233 { 234 init(); 235 this->file = BAD_FILE; 236 } 237 238 /** Immediate value constructor. */ 239 fs_reg(float f) 240 { 241 init(); 242 this->file = IMM; 243 this->type = BRW_REGISTER_TYPE_F; 244 this->imm.f = f; 245 } 246 247 /** Immediate value constructor. */ 248 fs_reg(int32_t i) 249 { 250 init(); 251 this->file = IMM; 252 this->type = BRW_REGISTER_TYPE_D; 253 this->imm.i = i; 254 } 255 256 /** Immediate value constructor. */ 257 fs_reg(uint32_t u) 258 { 259 init(); 260 this->file = IMM; 261 this->type = BRW_REGISTER_TYPE_UD; 262 this->imm.u = u; 263 } 264 265 /** Fixed brw_reg Immediate value constructor. */ 266 fs_reg(struct brw_reg fixed_hw_reg) 267 { 268 init(); 269 this->file = FIXED_HW_REG; 270 this->fixed_hw_reg = fixed_hw_reg; 271 this->type = fixed_hw_reg.type; 272 } 273 274 fs_reg(enum register_file file, int hw_reg); 275 fs_reg(class fs_visitor *v, const struct glsl_type *type); 276 277 /** Register file: ARF, GRF, MRF, IMM. */ 278 enum register_file file; 279 /** virtual register number. 0 = fixed hw reg */ 280 int reg; 281 /** Offset within the virtual register. */ 282 int reg_offset; 283 /** HW register number. Generally unset until register allocation. */ 284 int hw_reg; 285 /** Register type. BRW_REGISTER_TYPE_* */ 286 int type; 287 bool negate; 288 bool abs; 289 struct brw_reg fixed_hw_reg; 290 291 /** Value for file == BRW_IMMMEDIATE_FILE */ 292 union { 293 int32_t i; 294 uint32_t u; 295 float f; 296 } imm; 297}; 298 299static const fs_reg reg_undef; 300static const fs_reg reg_null(ARF, BRW_ARF_NULL); 301 302class fs_inst : public exec_node { 303public: 304 /* Callers of this talloc-based new need not call delete. It's 305 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 306 static void* operator new(size_t size, void *ctx) 307 { 308 void *node; 309 310 node = talloc_zero_size(ctx, size); 311 assert(node != NULL); 312 313 return node; 314 } 315 316 void init() 317 { 318 this->opcode = BRW_OPCODE_NOP; 319 this->saturate = false; 320 this->conditional_mod = BRW_CONDITIONAL_NONE; 321 this->predicated = false; 322 this->sampler = 0; 323 this->target = 0; 324 this->eot = false; 325 this->shadow_compare = false; 326 } 327 328 fs_inst() 329 { 330 init(); 331 } 332 333 fs_inst(int opcode) 334 { 335 init(); 336 this->opcode = opcode; 337 } 338 339 fs_inst(int opcode, fs_reg dst, fs_reg src0) 340 { 341 init(); 342 this->opcode = opcode; 343 this->dst = dst; 344 this->src[0] = src0; 345 } 346 347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 348 { 349 init(); 350 this->opcode = opcode; 351 this->dst = dst; 352 this->src[0] = src0; 353 this->src[1] = src1; 354 } 355 356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 357 { 358 init(); 359 this->opcode = opcode; 360 this->dst = dst; 361 this->src[0] = src0; 362 this->src[1] = src1; 363 this->src[2] = src2; 364 } 365 366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 367 fs_reg dst; 368 fs_reg src[3]; 369 bool saturate; 370 bool predicated; 371 int conditional_mod; /**< BRW_CONDITIONAL_* */ 372 373 int mlen; /**< SEND message length */ 374 int sampler; 375 int target; /**< MRT target. */ 376 bool eot; 377 bool shadow_compare; 378 379 /** @{ 380 * Annotation for the generated IR. One of the two can be set. 381 */ 382 ir_instruction *ir; 383 const char *annotation; 384 /** @} */ 385}; 386 387class fs_visitor : public ir_visitor 388{ 389public: 390 391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 392 { 393 this->c = c; 394 this->p = &c->func; 395 this->brw = p->brw; 396 this->fp = brw->fragment_program; 397 this->intel = &brw->intel; 398 this->ctx = &intel->ctx; 399 this->mem_ctx = talloc_new(NULL); 400 this->shader = shader; 401 this->fail = false; 402 this->variable_ht = hash_table_ctor(0, 403 hash_table_pointer_hash, 404 hash_table_pointer_compare); 405 406 this->frag_color = NULL; 407 this->frag_data = NULL; 408 this->frag_depth = NULL; 409 this->first_non_payload_grf = 0; 410 411 this->current_annotation = NULL; 412 this->annotation_string = NULL; 413 this->annotation_ir = NULL; 414 this->base_ir = NULL; 415 416 this->virtual_grf_sizes = NULL; 417 this->virtual_grf_next = 1; 418 this->virtual_grf_array_size = 0; 419 } 420 ~fs_visitor() 421 { 422 talloc_free(this->mem_ctx); 423 hash_table_dtor(this->variable_ht); 424 } 425 426 fs_reg *variable_storage(ir_variable *var); 427 int virtual_grf_alloc(int size); 428 429 void visit(ir_variable *ir); 430 void visit(ir_assignment *ir); 431 void visit(ir_dereference_variable *ir); 432 void visit(ir_dereference_record *ir); 433 void visit(ir_dereference_array *ir); 434 void visit(ir_expression *ir); 435 void visit(ir_texture *ir); 436 void visit(ir_if *ir); 437 void visit(ir_constant *ir); 438 void visit(ir_swizzle *ir); 439 void visit(ir_return *ir); 440 void visit(ir_loop *ir); 441 void visit(ir_loop_jump *ir); 442 void visit(ir_discard *ir); 443 void visit(ir_call *ir); 444 void visit(ir_function *ir); 445 void visit(ir_function_signature *ir); 446 447 fs_inst *emit(fs_inst inst); 448 void assign_curb_setup(); 449 void assign_urb_setup(); 450 void assign_regs(); 451 void generate_code(); 452 void generate_fb_write(fs_inst *inst); 453 void generate_linterp(fs_inst *inst, struct brw_reg dst, 454 struct brw_reg *src); 455 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 456 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 457 void generate_discard(fs_inst *inst, struct brw_reg temp); 458 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 459 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 460 461 void emit_dummy_fs(); 462 void emit_fragcoord_interpolation(ir_variable *ir); 463 void emit_general_interpolation(ir_variable *ir); 464 void emit_interpolation_setup(); 465 void emit_fb_writes(); 466 467 struct brw_reg interp_reg(int location, int channel); 468 int setup_uniform_values(int loc, const glsl_type *type); 469 void setup_builtin_uniform_values(ir_variable *ir); 470 471 struct brw_context *brw; 472 const struct gl_fragment_program *fp; 473 struct intel_context *intel; 474 GLcontext *ctx; 475 struct brw_wm_compile *c; 476 struct brw_compile *p; 477 struct brw_shader *shader; 478 void *mem_ctx; 479 exec_list instructions; 480 481 int *virtual_grf_sizes; 482 int virtual_grf_next; 483 int virtual_grf_array_size; 484 485 struct hash_table *variable_ht; 486 ir_variable *frag_color, *frag_data, *frag_depth; 487 int first_non_payload_grf; 488 489 /** @{ debug annotation info */ 490 const char *current_annotation; 491 ir_instruction *base_ir; 492 const char **annotation_string; 493 ir_instruction **annotation_ir; 494 /** @} */ 495 496 bool fail; 497 498 /* Result of last visit() method. */ 499 fs_reg result; 500 501 fs_reg pixel_x; 502 fs_reg pixel_y; 503 fs_reg wpos_w; 504 fs_reg pixel_w; 505 fs_reg delta_x; 506 fs_reg delta_y; 507 508 int grf_used; 509 510}; 511 512int 513fs_visitor::virtual_grf_alloc(int size) 514{ 515 if (virtual_grf_array_size <= virtual_grf_next) { 516 if (virtual_grf_array_size == 0) 517 virtual_grf_array_size = 16; 518 else 519 virtual_grf_array_size *= 2; 520 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 521 int, virtual_grf_array_size); 522 523 /* This slot is always unused. */ 524 virtual_grf_sizes[0] = 0; 525 } 526 virtual_grf_sizes[virtual_grf_next] = size; 527 return virtual_grf_next++; 528} 529 530/** Fixed HW reg constructor. */ 531fs_reg::fs_reg(enum register_file file, int hw_reg) 532{ 533 init(); 534 this->file = file; 535 this->hw_reg = hw_reg; 536 this->type = BRW_REGISTER_TYPE_F; 537} 538 539int 540brw_type_for_base_type(const struct glsl_type *type) 541{ 542 switch (type->base_type) { 543 case GLSL_TYPE_FLOAT: 544 return BRW_REGISTER_TYPE_F; 545 case GLSL_TYPE_INT: 546 case GLSL_TYPE_BOOL: 547 return BRW_REGISTER_TYPE_D; 548 case GLSL_TYPE_UINT: 549 return BRW_REGISTER_TYPE_UD; 550 case GLSL_TYPE_ARRAY: 551 case GLSL_TYPE_STRUCT: 552 /* These should be overridden with the type of the member when 553 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 554 * way to trip up if we don't. 555 */ 556 return BRW_REGISTER_TYPE_UD; 557 default: 558 assert(!"not reached"); 559 return BRW_REGISTER_TYPE_F; 560 } 561} 562 563/** Automatic reg constructor. */ 564fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 565{ 566 init(); 567 568 this->file = GRF; 569 this->reg = v->virtual_grf_alloc(type_size(type)); 570 this->reg_offset = 0; 571 this->type = brw_type_for_base_type(type); 572} 573 574fs_reg * 575fs_visitor::variable_storage(ir_variable *var) 576{ 577 return (fs_reg *)hash_table_find(this->variable_ht, var); 578} 579 580/* Our support for uniforms is piggy-backed on the struct 581 * gl_fragment_program, because that's where the values actually 582 * get stored, rather than in some global gl_shader_program uniform 583 * store. 584 */ 585int 586fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 587{ 588 unsigned int offset = 0; 589 float *vec_values; 590 591 if (type->is_matrix()) { 592 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 593 type->vector_elements, 594 1); 595 596 for (unsigned int i = 0; i < type->matrix_columns; i++) { 597 offset += setup_uniform_values(loc + offset, column); 598 } 599 600 return offset; 601 } 602 603 switch (type->base_type) { 604 case GLSL_TYPE_FLOAT: 605 case GLSL_TYPE_UINT: 606 case GLSL_TYPE_INT: 607 case GLSL_TYPE_BOOL: 608 vec_values = fp->Base.Parameters->ParameterValues[loc]; 609 for (unsigned int i = 0; i < type->vector_elements; i++) { 610 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 611 } 612 return 1; 613 614 case GLSL_TYPE_STRUCT: 615 for (unsigned int i = 0; i < type->length; i++) { 616 offset += setup_uniform_values(loc + offset, 617 type->fields.structure[i].type); 618 } 619 return offset; 620 621 case GLSL_TYPE_ARRAY: 622 for (unsigned int i = 0; i < type->length; i++) { 623 offset += setup_uniform_values(loc + offset, type->fields.array); 624 } 625 return offset; 626 627 case GLSL_TYPE_SAMPLER: 628 /* The sampler takes up a slot, but we don't use any values from it. */ 629 return 1; 630 631 default: 632 assert(!"not reached"); 633 return 0; 634 } 635} 636 637 638/* Our support for builtin uniforms is even scarier than non-builtin. 639 * It sits on top of the PROG_STATE_VAR parameters that are 640 * automatically updated from GL context state. 641 */ 642void 643fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 644{ 645 const struct gl_builtin_uniform_desc *statevar = NULL; 646 647 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 648 statevar = &_mesa_builtin_uniform_desc[i]; 649 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 650 break; 651 } 652 653 if (!statevar->name) { 654 this->fail = true; 655 printf("Failed to find builtin uniform `%s'\n", ir->name); 656 return; 657 } 658 659 int array_count; 660 if (ir->type->is_array()) { 661 array_count = ir->type->length; 662 } else { 663 array_count = 1; 664 } 665 666 for (int a = 0; a < array_count; a++) { 667 for (unsigned int i = 0; i < statevar->num_elements; i++) { 668 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 669 int tokens[STATE_LENGTH]; 670 671 memcpy(tokens, element->tokens, sizeof(element->tokens)); 672 if (ir->type->is_array()) { 673 tokens[1] = a; 674 } 675 676 /* This state reference has already been setup by ir_to_mesa, 677 * but we'll get the same index back here. 678 */ 679 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 680 (gl_state_index *)tokens); 681 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 682 683 /* Add each of the unique swizzles of the element as a 684 * parameter. This'll end up matching the expected layout of 685 * the array/matrix/structure we're trying to fill in. 686 */ 687 int last_swiz = -1; 688 for (unsigned int i = 0; i < 4; i++) { 689 int this_swiz = GET_SWZ(element->swizzle, i); 690 if (this_swiz == last_swiz) 691 break; 692 last_swiz = this_swiz; 693 694 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 695 } 696 } 697 } 698} 699 700void 701fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 702{ 703 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 704 fs_reg wpos = *reg; 705 fs_reg neg_y = this->pixel_y; 706 neg_y.negate = true; 707 708 /* gl_FragCoord.x */ 709 if (ir->pixel_center_integer) { 710 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 711 } else { 712 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 713 } 714 wpos.reg_offset++; 715 716 /* gl_FragCoord.y */ 717 if (ir->origin_upper_left && ir->pixel_center_integer) { 718 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 719 } else { 720 fs_reg pixel_y = this->pixel_y; 721 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 722 723 if (!ir->origin_upper_left) { 724 pixel_y.negate = true; 725 offset += c->key.drawable_height - 1.0; 726 } 727 728 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 729 } 730 wpos.reg_offset++; 731 732 /* gl_FragCoord.z */ 733 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 734 interp_reg(FRAG_ATTRIB_WPOS, 2))); 735 wpos.reg_offset++; 736 737 /* gl_FragCoord.w: Already set up in emit_interpolation */ 738 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 739 740 hash_table_insert(this->variable_ht, reg, ir); 741} 742 743 744void 745fs_visitor::emit_general_interpolation(ir_variable *ir) 746{ 747 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 748 /* Interpolation is always in floating point regs. */ 749 reg->type = BRW_REGISTER_TYPE_F; 750 fs_reg attr = *reg; 751 752 unsigned int array_elements; 753 const glsl_type *type; 754 755 if (ir->type->is_array()) { 756 array_elements = ir->type->length; 757 if (array_elements == 0) { 758 this->fail = true; 759 } 760 type = ir->type->fields.array; 761 } else { 762 array_elements = 1; 763 type = ir->type; 764 } 765 766 int location = ir->location; 767 for (unsigned int i = 0; i < array_elements; i++) { 768 for (unsigned int j = 0; j < type->matrix_columns; j++) { 769 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) { 770 /* If there's no incoming setup data for this slot, don't 771 * emit interpolation for it (since it's not used, and 772 * we'd fall over later trying to find the setup data. 773 */ 774 attr.reg_offset += type->vector_elements; 775 continue; 776 } 777 778 for (unsigned int c = 0; c < type->vector_elements; c++) { 779 struct brw_reg interp = interp_reg(location, c); 780 emit(fs_inst(FS_OPCODE_LINTERP, 781 attr, 782 this->delta_x, 783 this->delta_y, 784 fs_reg(interp))); 785 attr.reg_offset++; 786 } 787 attr.reg_offset -= type->vector_elements; 788 789 for (unsigned int c = 0; c < type->vector_elements; c++) { 790 emit(fs_inst(BRW_OPCODE_MUL, 791 attr, 792 attr, 793 this->pixel_w)); 794 attr.reg_offset++; 795 } 796 location++; 797 } 798 } 799 800 hash_table_insert(this->variable_ht, reg, ir); 801} 802 803void 804fs_visitor::visit(ir_variable *ir) 805{ 806 fs_reg *reg = NULL; 807 808 if (variable_storage(ir)) 809 return; 810 811 if (strcmp(ir->name, "gl_FragColor") == 0) { 812 this->frag_color = ir; 813 } else if (strcmp(ir->name, "gl_FragData") == 0) { 814 this->frag_data = ir; 815 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 816 this->frag_depth = ir; 817 } 818 819 if (ir->mode == ir_var_in) { 820 if (!strcmp(ir->name, "gl_FragCoord")) { 821 emit_fragcoord_interpolation(ir); 822 return; 823 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 824 reg = new(this->mem_ctx) fs_reg(this, ir->type); 825 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 826 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 827 * us front face 828 */ 829 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 830 *reg, 831 fs_reg(r1_6ud), 832 fs_reg(1u << 31))); 833 inst->conditional_mod = BRW_CONDITIONAL_L; 834 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 835 } else { 836 emit_general_interpolation(ir); 837 return; 838 } 839 } 840 841 if (ir->mode == ir_var_uniform) { 842 int param_index = c->prog_data.nr_params; 843 844 if (!strncmp(ir->name, "gl_", 3)) { 845 setup_builtin_uniform_values(ir); 846 } else { 847 setup_uniform_values(ir->location, ir->type); 848 } 849 850 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 851 } 852 853 if (!reg) 854 reg = new(this->mem_ctx) fs_reg(this, ir->type); 855 856 hash_table_insert(this->variable_ht, reg, ir); 857} 858 859void 860fs_visitor::visit(ir_dereference_variable *ir) 861{ 862 fs_reg *reg = variable_storage(ir->var); 863 this->result = *reg; 864} 865 866void 867fs_visitor::visit(ir_dereference_record *ir) 868{ 869 const glsl_type *struct_type = ir->record->type; 870 871 ir->record->accept(this); 872 873 unsigned int offset = 0; 874 for (unsigned int i = 0; i < struct_type->length; i++) { 875 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 876 break; 877 offset += type_size(struct_type->fields.structure[i].type); 878 } 879 this->result.reg_offset += offset; 880 this->result.type = brw_type_for_base_type(ir->type); 881} 882 883void 884fs_visitor::visit(ir_dereference_array *ir) 885{ 886 ir_constant *index; 887 int element_size; 888 889 ir->array->accept(this); 890 index = ir->array_index->as_constant(); 891 892 element_size = type_size(ir->type); 893 this->result.type = brw_type_for_base_type(ir->type); 894 895 if (index) { 896 assert(this->result.file == UNIFORM || 897 (this->result.file == GRF && 898 this->result.reg != 0)); 899 this->result.reg_offset += index->value.i[0] * element_size; 900 } else { 901 assert(!"FINISHME: non-constant array element"); 902 } 903} 904 905void 906fs_visitor::visit(ir_expression *ir) 907{ 908 unsigned int operand; 909 fs_reg op[2], temp; 910 fs_reg result; 911 fs_inst *inst; 912 913 for (operand = 0; operand < ir->get_num_operands(); operand++) { 914 ir->operands[operand]->accept(this); 915 if (this->result.file == BAD_FILE) { 916 ir_print_visitor v; 917 printf("Failed to get tree for expression operand:\n"); 918 ir->operands[operand]->accept(&v); 919 this->fail = true; 920 } 921 op[operand] = this->result; 922 923 /* Matrix expression operands should have been broken down to vector 924 * operations already. 925 */ 926 assert(!ir->operands[operand]->type->is_matrix()); 927 /* And then those vector operands should have been broken down to scalar. 928 */ 929 assert(!ir->operands[operand]->type->is_vector()); 930 } 931 932 /* Storage for our result. If our result goes into an assignment, it will 933 * just get copy-propagated out, so no worries. 934 */ 935 this->result = fs_reg(this, ir->type); 936 937 switch (ir->operation) { 938 case ir_unop_logic_not: 939 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 940 break; 941 case ir_unop_neg: 942 op[0].negate = !op[0].negate; 943 this->result = op[0]; 944 break; 945 case ir_unop_abs: 946 op[0].abs = true; 947 this->result = op[0]; 948 break; 949 case ir_unop_sign: 950 temp = fs_reg(this, ir->type); 951 952 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 953 954 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 955 inst->conditional_mod = BRW_CONDITIONAL_G; 956 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 957 inst->predicated = true; 958 959 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 960 inst->conditional_mod = BRW_CONDITIONAL_L; 961 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 962 inst->predicated = true; 963 964 break; 965 case ir_unop_rcp: 966 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 967 break; 968 969 case ir_unop_exp2: 970 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 971 break; 972 case ir_unop_log2: 973 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 974 break; 975 case ir_unop_exp: 976 case ir_unop_log: 977 assert(!"not reached: should be handled by ir_explog_to_explog2"); 978 break; 979 case ir_unop_sin: 980 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 981 break; 982 case ir_unop_cos: 983 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 984 break; 985 986 case ir_unop_dFdx: 987 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 988 break; 989 case ir_unop_dFdy: 990 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 991 break; 992 993 case ir_binop_add: 994 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 995 break; 996 case ir_binop_sub: 997 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 998 break; 999 1000 case ir_binop_mul: 1001 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 1002 break; 1003 case ir_binop_div: 1004 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1005 break; 1006 case ir_binop_mod: 1007 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1008 break; 1009 1010 case ir_binop_less: 1011 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1012 inst->conditional_mod = BRW_CONDITIONAL_L; 1013 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1014 break; 1015 case ir_binop_greater: 1016 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1017 inst->conditional_mod = BRW_CONDITIONAL_G; 1018 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1019 break; 1020 case ir_binop_lequal: 1021 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1022 inst->conditional_mod = BRW_CONDITIONAL_LE; 1023 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1024 break; 1025 case ir_binop_gequal: 1026 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1027 inst->conditional_mod = BRW_CONDITIONAL_GE; 1028 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1029 break; 1030 case ir_binop_equal: 1031 case ir_binop_all_equal: /* same as nequal for scalars */ 1032 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1033 inst->conditional_mod = BRW_CONDITIONAL_Z; 1034 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1035 break; 1036 case ir_binop_nequal: 1037 case ir_binop_any_nequal: /* same as nequal for scalars */ 1038 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1039 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1040 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1041 break; 1042 1043 case ir_binop_logic_xor: 1044 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1045 break; 1046 1047 case ir_binop_logic_or: 1048 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1049 break; 1050 1051 case ir_binop_logic_and: 1052 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1053 break; 1054 1055 case ir_binop_dot: 1056 case ir_binop_cross: 1057 case ir_unop_any: 1058 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 1059 break; 1060 1061 case ir_unop_noise: 1062 assert(!"not reached: should be handled by lower_noise"); 1063 break; 1064 1065 case ir_unop_sqrt: 1066 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 1067 break; 1068 1069 case ir_unop_rsq: 1070 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 1071 break; 1072 1073 case ir_unop_i2f: 1074 case ir_unop_b2f: 1075 case ir_unop_b2i: 1076 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1077 break; 1078 case ir_unop_f2i: 1079 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1080 break; 1081 case ir_unop_f2b: 1082 case ir_unop_i2b: 1083 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 1084 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1085 1086 case ir_unop_trunc: 1087 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1088 break; 1089 case ir_unop_ceil: 1090 op[0].negate = ~op[0].negate; 1091 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1092 this->result.negate = true; 1093 break; 1094 case ir_unop_floor: 1095 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1096 break; 1097 case ir_unop_fract: 1098 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1099 break; 1100 1101 case ir_binop_min: 1102 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1103 inst->conditional_mod = BRW_CONDITIONAL_L; 1104 1105 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1106 inst->predicated = true; 1107 break; 1108 case ir_binop_max: 1109 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1110 inst->conditional_mod = BRW_CONDITIONAL_G; 1111 1112 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1113 inst->predicated = true; 1114 break; 1115 1116 case ir_binop_pow: 1117 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1118 break; 1119 1120 case ir_unop_bit_not: 1121 case ir_unop_u2f: 1122 case ir_binop_lshift: 1123 case ir_binop_rshift: 1124 case ir_binop_bit_and: 1125 case ir_binop_bit_xor: 1126 case ir_binop_bit_or: 1127 assert(!"GLSL 1.30 features unsupported"); 1128 break; 1129 } 1130} 1131 1132void 1133fs_visitor::visit(ir_assignment *ir) 1134{ 1135 struct fs_reg l, r; 1136 int i; 1137 int write_mask; 1138 fs_inst *inst; 1139 1140 /* FINISHME: arrays on the lhs */ 1141 ir->lhs->accept(this); 1142 l = this->result; 1143 1144 ir->rhs->accept(this); 1145 r = this->result; 1146 1147 /* FINISHME: This should really set to the correct maximal writemask for each 1148 * FINISHME: component written (in the loops below). This case can only 1149 * FINISHME: occur for matrices, arrays, and structures. 1150 */ 1151 if (ir->write_mask == 0) { 1152 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1153 write_mask = WRITEMASK_XYZW; 1154 } else { 1155 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 1156 write_mask = ir->write_mask; 1157 } 1158 1159 assert(l.file != BAD_FILE); 1160 assert(r.file != BAD_FILE); 1161 1162 if (ir->condition) { 1163 /* Get the condition bool into the predicate. */ 1164 ir->condition->accept(this); 1165 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1166 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1167 } 1168 1169 for (i = 0; i < type_size(ir->lhs->type); i++) { 1170 if (i >= 4 || (write_mask & (1 << i))) { 1171 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1172 if (ir->condition) 1173 inst->predicated = true; 1174 r.reg_offset++; 1175 } 1176 l.reg_offset++; 1177 } 1178} 1179 1180void 1181fs_visitor::visit(ir_texture *ir) 1182{ 1183 int base_mrf = 2; 1184 fs_inst *inst = NULL; 1185 unsigned int mlen = 0; 1186 1187 ir->coordinate->accept(this); 1188 fs_reg coordinate = this->result; 1189 1190 if (ir->projector) { 1191 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 1192 1193 ir->projector->accept(this); 1194 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 1195 1196 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 1197 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1198 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 1199 coordinate.reg_offset++; 1200 proj_coordinate.reg_offset++; 1201 } 1202 proj_coordinate.reg_offset = 0; 1203 1204 coordinate = proj_coordinate; 1205 } 1206 1207 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1208 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1209 coordinate.reg_offset++; 1210 } 1211 1212 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 1213 if (intel->gen < 5) 1214 mlen = 3; 1215 1216 if (ir->shadow_comparitor) { 1217 /* For shadow comparisons, we have to supply u,v,r. */ 1218 mlen = 3; 1219 1220 ir->shadow_comparitor->accept(this); 1221 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1222 mlen++; 1223 } 1224 1225 /* Do we ever want to handle writemasking on texture samples? Is it 1226 * performance relevant? 1227 */ 1228 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1229 1230 switch (ir->op) { 1231 case ir_tex: 1232 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1233 break; 1234 case ir_txb: 1235 ir->lod_info.bias->accept(this); 1236 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1237 mlen++; 1238 1239 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1240 break; 1241 case ir_txl: 1242 ir->lod_info.lod->accept(this); 1243 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1244 mlen++; 1245 1246 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1247 break; 1248 case ir_txd: 1249 case ir_txf: 1250 assert(!"GLSL 1.30 features unsupported"); 1251 break; 1252 } 1253 1254 inst->sampler = 1255 _mesa_get_sampler_uniform_value(ir->sampler, 1256 ctx->Shader.CurrentProgram, 1257 &brw->fragment_program->Base); 1258 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1259 1260 this->result = dst; 1261 1262 if (ir->shadow_comparitor) 1263 inst->shadow_compare = true; 1264 inst->mlen = mlen; 1265} 1266 1267void 1268fs_visitor::visit(ir_swizzle *ir) 1269{ 1270 ir->val->accept(this); 1271 fs_reg val = this->result; 1272 1273 fs_reg result = fs_reg(this, ir->type); 1274 this->result = result; 1275 1276 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1277 fs_reg channel = val; 1278 int swiz = 0; 1279 1280 switch (i) { 1281 case 0: 1282 swiz = ir->mask.x; 1283 break; 1284 case 1: 1285 swiz = ir->mask.y; 1286 break; 1287 case 2: 1288 swiz = ir->mask.z; 1289 break; 1290 case 3: 1291 swiz = ir->mask.w; 1292 break; 1293 } 1294 1295 channel.reg_offset += swiz; 1296 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1297 result.reg_offset++; 1298 } 1299} 1300 1301void 1302fs_visitor::visit(ir_discard *ir) 1303{ 1304 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1305 1306 assert(ir->condition == NULL); /* FINISHME */ 1307 1308 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp)); 1309} 1310 1311void 1312fs_visitor::visit(ir_constant *ir) 1313{ 1314 fs_reg reg(this, ir->type); 1315 this->result = reg; 1316 1317 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1318 switch (ir->type->base_type) { 1319 case GLSL_TYPE_FLOAT: 1320 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1321 break; 1322 case GLSL_TYPE_UINT: 1323 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1324 break; 1325 case GLSL_TYPE_INT: 1326 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1327 break; 1328 case GLSL_TYPE_BOOL: 1329 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1330 break; 1331 default: 1332 assert(!"Non-float/uint/int/bool constant"); 1333 } 1334 reg.reg_offset++; 1335 } 1336} 1337 1338void 1339fs_visitor::visit(ir_if *ir) 1340{ 1341 fs_inst *inst; 1342 1343 /* Don't point the annotation at the if statement, because then it plus 1344 * the then and else blocks get printed. 1345 */ 1346 this->base_ir = ir->condition; 1347 1348 /* Generate the condition into the condition code. */ 1349 ir->condition->accept(this); 1350 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1351 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1352 1353 inst = emit(fs_inst(BRW_OPCODE_IF)); 1354 inst->predicated = true; 1355 1356 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1357 ir_instruction *ir = (ir_instruction *)iter.get(); 1358 this->base_ir = ir; 1359 1360 ir->accept(this); 1361 } 1362 1363 if (!ir->else_instructions.is_empty()) { 1364 emit(fs_inst(BRW_OPCODE_ELSE)); 1365 1366 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1367 ir_instruction *ir = (ir_instruction *)iter.get(); 1368 this->base_ir = ir; 1369 1370 ir->accept(this); 1371 } 1372 } 1373 1374 emit(fs_inst(BRW_OPCODE_ENDIF)); 1375} 1376 1377void 1378fs_visitor::visit(ir_loop *ir) 1379{ 1380 fs_reg counter = reg_undef; 1381 1382 if (ir->counter) { 1383 this->base_ir = ir->counter; 1384 ir->counter->accept(this); 1385 counter = *(variable_storage(ir->counter)); 1386 1387 if (ir->from) { 1388 this->base_ir = ir->from; 1389 ir->from->accept(this); 1390 1391 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1392 } 1393 } 1394 1395 /* Start a safety counter. If the user messed up their loop 1396 * counting, we don't want to hang the GPU. 1397 */ 1398 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1399 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1400 1401 emit(fs_inst(BRW_OPCODE_DO)); 1402 1403 if (ir->to) { 1404 this->base_ir = ir->to; 1405 ir->to->accept(this); 1406 1407 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1408 counter, this->result)); 1409 switch (ir->cmp) { 1410 case ir_binop_equal: 1411 inst->conditional_mod = BRW_CONDITIONAL_Z; 1412 break; 1413 case ir_binop_nequal: 1414 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1415 break; 1416 case ir_binop_gequal: 1417 inst->conditional_mod = BRW_CONDITIONAL_GE; 1418 break; 1419 case ir_binop_lequal: 1420 inst->conditional_mod = BRW_CONDITIONAL_LE; 1421 break; 1422 case ir_binop_greater: 1423 inst->conditional_mod = BRW_CONDITIONAL_G; 1424 break; 1425 case ir_binop_less: 1426 inst->conditional_mod = BRW_CONDITIONAL_L; 1427 break; 1428 default: 1429 assert(!"not reached: unknown loop condition"); 1430 this->fail = true; 1431 break; 1432 } 1433 1434 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1435 inst->predicated = true; 1436 } 1437 1438 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1439 ir_instruction *ir = (ir_instruction *)iter.get(); 1440 fs_inst *inst; 1441 1442 this->base_ir = ir; 1443 ir->accept(this); 1444 1445 /* Check the maximum loop iters counter. */ 1446 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1447 inst->conditional_mod = BRW_CONDITIONAL_Z; 1448 1449 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1450 inst->predicated = true; 1451 } 1452 1453 if (ir->increment) { 1454 this->base_ir = ir->increment; 1455 ir->increment->accept(this); 1456 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1457 } 1458 1459 emit(fs_inst(BRW_OPCODE_WHILE)); 1460} 1461 1462void 1463fs_visitor::visit(ir_loop_jump *ir) 1464{ 1465 switch (ir->mode) { 1466 case ir_loop_jump::jump_break: 1467 emit(fs_inst(BRW_OPCODE_BREAK)); 1468 break; 1469 case ir_loop_jump::jump_continue: 1470 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1471 break; 1472 } 1473} 1474 1475void 1476fs_visitor::visit(ir_call *ir) 1477{ 1478 assert(!"FINISHME"); 1479} 1480 1481void 1482fs_visitor::visit(ir_return *ir) 1483{ 1484 assert(!"FINISHME"); 1485} 1486 1487void 1488fs_visitor::visit(ir_function *ir) 1489{ 1490 /* Ignore function bodies other than main() -- we shouldn't see calls to 1491 * them since they should all be inlined before we get to ir_to_mesa. 1492 */ 1493 if (strcmp(ir->name, "main") == 0) { 1494 const ir_function_signature *sig; 1495 exec_list empty; 1496 1497 sig = ir->matching_signature(&empty); 1498 1499 assert(sig); 1500 1501 foreach_iter(exec_list_iterator, iter, sig->body) { 1502 ir_instruction *ir = (ir_instruction *)iter.get(); 1503 this->base_ir = ir; 1504 1505 ir->accept(this); 1506 } 1507 } 1508} 1509 1510void 1511fs_visitor::visit(ir_function_signature *ir) 1512{ 1513 assert(!"not reached"); 1514 (void)ir; 1515} 1516 1517fs_inst * 1518fs_visitor::emit(fs_inst inst) 1519{ 1520 fs_inst *list_inst = new(mem_ctx) fs_inst; 1521 *list_inst = inst; 1522 1523 list_inst->annotation = this->current_annotation; 1524 list_inst->ir = this->base_ir; 1525 1526 this->instructions.push_tail(list_inst); 1527 1528 return list_inst; 1529} 1530 1531/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1532void 1533fs_visitor::emit_dummy_fs() 1534{ 1535 /* Everyone's favorite color. */ 1536 emit(fs_inst(BRW_OPCODE_MOV, 1537 fs_reg(MRF, 2), 1538 fs_reg(1.0f))); 1539 emit(fs_inst(BRW_OPCODE_MOV, 1540 fs_reg(MRF, 3), 1541 fs_reg(0.0f))); 1542 emit(fs_inst(BRW_OPCODE_MOV, 1543 fs_reg(MRF, 4), 1544 fs_reg(1.0f))); 1545 emit(fs_inst(BRW_OPCODE_MOV, 1546 fs_reg(MRF, 5), 1547 fs_reg(0.0f))); 1548 1549 fs_inst *write; 1550 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1551 fs_reg(0), 1552 fs_reg(0))); 1553} 1554 1555/* The register location here is relative to the start of the URB 1556 * data. It will get adjusted to be a real location before 1557 * generate_code() time. 1558 */ 1559struct brw_reg 1560fs_visitor::interp_reg(int location, int channel) 1561{ 1562 int regnr = location * 2 + channel / 2; 1563 int stride = (channel & 1) * 4; 1564 1565 return brw_vec1_grf(regnr, stride); 1566} 1567 1568/** Emits the interpolation for the varying inputs. */ 1569void 1570fs_visitor::emit_interpolation_setup() 1571{ 1572 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1573 1574 this->current_annotation = "compute pixel centers"; 1575 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1576 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1577 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1578 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1579 emit(fs_inst(BRW_OPCODE_ADD, 1580 this->pixel_x, 1581 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1582 fs_reg(brw_imm_v(0x10101010)))); 1583 emit(fs_inst(BRW_OPCODE_ADD, 1584 this->pixel_y, 1585 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1586 fs_reg(brw_imm_v(0x11001100)))); 1587 1588 this->current_annotation = "compute pixel deltas from v0"; 1589 this->delta_x = fs_reg(this, glsl_type::float_type); 1590 this->delta_y = fs_reg(this, glsl_type::float_type); 1591 emit(fs_inst(BRW_OPCODE_ADD, 1592 this->delta_x, 1593 this->pixel_x, 1594 fs_reg(negate(brw_vec1_grf(1, 0))))); 1595 emit(fs_inst(BRW_OPCODE_ADD, 1596 this->delta_y, 1597 this->pixel_y, 1598 fs_reg(negate(brw_vec1_grf(1, 1))))); 1599 1600 this->current_annotation = "compute pos.w and 1/pos.w"; 1601 /* Compute wpos.w. It's always in our setup, since it's needed to 1602 * interpolate the other attributes. 1603 */ 1604 this->wpos_w = fs_reg(this, glsl_type::float_type); 1605 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1606 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1607 /* Compute the pixel 1/W value from wpos.w. */ 1608 this->pixel_w = fs_reg(this, glsl_type::float_type); 1609 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1610 this->current_annotation = NULL; 1611} 1612 1613void 1614fs_visitor::emit_fb_writes() 1615{ 1616 this->current_annotation = "FB write header"; 1617 int nr = 0; 1618 1619 /* m0, m1 header */ 1620 nr += 2; 1621 1622 if (c->key.aa_dest_stencil_reg) { 1623 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1624 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1625 } 1626 1627 /* Reserve space for color. It'll be filled in per MRT below. */ 1628 int color_mrf = nr; 1629 nr += 4; 1630 1631 if (c->key.source_depth_to_render_target) { 1632 if (c->key.computes_depth) { 1633 /* Hand over gl_FragDepth. */ 1634 assert(this->frag_depth); 1635 fs_reg depth = *(variable_storage(this->frag_depth)); 1636 1637 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1638 } else { 1639 /* Pass through the payload depth. */ 1640 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1641 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1642 } 1643 } 1644 1645 if (c->key.dest_depth_reg) { 1646 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1647 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1648 } 1649 1650 fs_reg color = reg_undef; 1651 if (this->frag_color) 1652 color = *(variable_storage(this->frag_color)); 1653 else if (this->frag_data) 1654 color = *(variable_storage(this->frag_data)); 1655 1656 for (int target = 0; target < c->key.nr_color_regions; target++) { 1657 this->current_annotation = talloc_asprintf(this->mem_ctx, 1658 "FB write target %d", 1659 target); 1660 if (this->frag_color || this->frag_data) { 1661 for (int i = 0; i < 4; i++) { 1662 emit(fs_inst(BRW_OPCODE_MOV, 1663 fs_reg(MRF, color_mrf + i), 1664 color)); 1665 color.reg_offset++; 1666 } 1667 } 1668 1669 if (this->frag_color) 1670 color.reg_offset -= 4; 1671 1672 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1673 reg_undef, reg_undef)); 1674 inst->target = target; 1675 inst->mlen = nr; 1676 if (target == c->key.nr_color_regions - 1) 1677 inst->eot = true; 1678 } 1679 1680 if (c->key.nr_color_regions == 0) { 1681 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1682 reg_undef, reg_undef)); 1683 inst->mlen = nr; 1684 inst->eot = true; 1685 } 1686 1687 this->current_annotation = NULL; 1688} 1689 1690void 1691fs_visitor::generate_fb_write(fs_inst *inst) 1692{ 1693 GLboolean eot = inst->eot; 1694 1695 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1696 * move, here's g1. 1697 */ 1698 brw_push_insn_state(p); 1699 brw_set_mask_control(p, BRW_MASK_DISABLE); 1700 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1701 brw_MOV(p, 1702 brw_message_reg(1), 1703 brw_vec8_grf(1, 0)); 1704 brw_pop_insn_state(p); 1705 1706 brw_fb_WRITE(p, 1707 8, /* dispatch_width */ 1708 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1709 0, /* base MRF */ 1710 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1711 inst->target, 1712 inst->mlen, 1713 0, 1714 eot); 1715} 1716 1717void 1718fs_visitor::generate_linterp(fs_inst *inst, 1719 struct brw_reg dst, struct brw_reg *src) 1720{ 1721 struct brw_reg delta_x = src[0]; 1722 struct brw_reg delta_y = src[1]; 1723 struct brw_reg interp = src[2]; 1724 1725 if (brw->has_pln && 1726 delta_y.nr == delta_x.nr + 1 && 1727 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1728 brw_PLN(p, dst, interp, delta_x); 1729 } else { 1730 brw_LINE(p, brw_null_reg(), interp, delta_x); 1731 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1732 } 1733} 1734 1735void 1736fs_visitor::generate_math(fs_inst *inst, 1737 struct brw_reg dst, struct brw_reg *src) 1738{ 1739 int op; 1740 1741 switch (inst->opcode) { 1742 case FS_OPCODE_RCP: 1743 op = BRW_MATH_FUNCTION_INV; 1744 break; 1745 case FS_OPCODE_RSQ: 1746 op = BRW_MATH_FUNCTION_RSQ; 1747 break; 1748 case FS_OPCODE_SQRT: 1749 op = BRW_MATH_FUNCTION_SQRT; 1750 break; 1751 case FS_OPCODE_EXP2: 1752 op = BRW_MATH_FUNCTION_EXP; 1753 break; 1754 case FS_OPCODE_LOG2: 1755 op = BRW_MATH_FUNCTION_LOG; 1756 break; 1757 case FS_OPCODE_POW: 1758 op = BRW_MATH_FUNCTION_POW; 1759 break; 1760 case FS_OPCODE_SIN: 1761 op = BRW_MATH_FUNCTION_SIN; 1762 break; 1763 case FS_OPCODE_COS: 1764 op = BRW_MATH_FUNCTION_COS; 1765 break; 1766 default: 1767 assert(!"not reached: unknown math function"); 1768 op = 0; 1769 break; 1770 } 1771 1772 if (inst->opcode == FS_OPCODE_POW) { 1773 brw_MOV(p, brw_message_reg(3), src[1]); 1774 } 1775 1776 brw_math(p, dst, 1777 op, 1778 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1779 BRW_MATH_SATURATE_NONE, 1780 2, src[0], 1781 BRW_MATH_DATA_VECTOR, 1782 BRW_MATH_PRECISION_FULL); 1783} 1784 1785void 1786fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1787{ 1788 int msg_type = -1; 1789 int rlen = 4; 1790 1791 if (intel->gen == 5) { 1792 switch (inst->opcode) { 1793 case FS_OPCODE_TEX: 1794 if (inst->shadow_compare) { 1795 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1796 } else { 1797 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1798 } 1799 break; 1800 case FS_OPCODE_TXB: 1801 if (inst->shadow_compare) { 1802 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1803 } else { 1804 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1805 } 1806 break; 1807 } 1808 } else { 1809 switch (inst->opcode) { 1810 case FS_OPCODE_TEX: 1811 /* Note that G45 and older determines shadow compare and dispatch width 1812 * from message length for most messages. 1813 */ 1814 if (inst->shadow_compare) { 1815 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1816 } else { 1817 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1818 } 1819 case FS_OPCODE_TXB: 1820 if (inst->shadow_compare) { 1821 assert(!"FINISHME: shadow compare with bias."); 1822 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1823 } else { 1824 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1825 rlen = 8; 1826 } 1827 break; 1828 } 1829 } 1830 assert(msg_type != -1); 1831 1832 /* g0 header. */ 1833 src.nr--; 1834 1835 brw_SAMPLE(p, 1836 retype(dst, BRW_REGISTER_TYPE_UW), 1837 src.nr, 1838 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1839 SURF_INDEX_TEXTURE(inst->sampler), 1840 inst->sampler, 1841 WRITEMASK_XYZW, 1842 msg_type, 1843 rlen, 1844 inst->mlen + 1, 1845 0, 1846 1, 1847 BRW_SAMPLER_SIMD_MODE_SIMD8); 1848} 1849 1850 1851/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1852 * looking like: 1853 * 1854 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1855 * 1856 * and we're trying to produce: 1857 * 1858 * DDX DDY 1859 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1860 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1861 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1862 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1863 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1864 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1865 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1866 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1867 * 1868 * and add another set of two more subspans if in 16-pixel dispatch mode. 1869 * 1870 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1871 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1872 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1873 * between each other. We could probably do it like ddx and swizzle the right 1874 * order later, but bail for now and just produce 1875 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1876 */ 1877void 1878fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1879{ 1880 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1881 BRW_REGISTER_TYPE_F, 1882 BRW_VERTICAL_STRIDE_2, 1883 BRW_WIDTH_2, 1884 BRW_HORIZONTAL_STRIDE_0, 1885 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1886 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1887 BRW_REGISTER_TYPE_F, 1888 BRW_VERTICAL_STRIDE_2, 1889 BRW_WIDTH_2, 1890 BRW_HORIZONTAL_STRIDE_0, 1891 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1892 brw_ADD(p, dst, src0, negate(src1)); 1893} 1894 1895void 1896fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1897{ 1898 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1899 BRW_REGISTER_TYPE_F, 1900 BRW_VERTICAL_STRIDE_4, 1901 BRW_WIDTH_4, 1902 BRW_HORIZONTAL_STRIDE_0, 1903 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1904 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1905 BRW_REGISTER_TYPE_F, 1906 BRW_VERTICAL_STRIDE_4, 1907 BRW_WIDTH_4, 1908 BRW_HORIZONTAL_STRIDE_0, 1909 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1910 brw_ADD(p, dst, src0, negate(src1)); 1911} 1912 1913void 1914fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp) 1915{ 1916 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1917 temp = brw_uw1_reg(temp.file, temp.nr, 0); 1918 1919 brw_push_insn_state(p); 1920 brw_set_mask_control(p, BRW_MASK_DISABLE); 1921 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */ 1922 brw_AND(p, g0, temp, g0); 1923 brw_pop_insn_state(p); 1924} 1925 1926void 1927fs_visitor::assign_curb_setup() 1928{ 1929 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1930 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1931 1932 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1933 c->prog_data.curb_read_length) & 1) { 1934 /* Align the start of the interpolation coefficients so that we can use 1935 * the PLN instruction. 1936 */ 1937 c->prog_data.first_curbe_grf++; 1938 } 1939 1940 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1941 foreach_iter(exec_list_iterator, iter, this->instructions) { 1942 fs_inst *inst = (fs_inst *)iter.get(); 1943 1944 for (unsigned int i = 0; i < 3; i++) { 1945 if (inst->src[i].file == UNIFORM) { 1946 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1947 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1948 constant_nr / 8, 1949 constant_nr % 8); 1950 1951 inst->src[i].file = FIXED_HW_REG; 1952 inst->src[i].fixed_hw_reg = brw_reg; 1953 } 1954 } 1955 } 1956} 1957 1958void 1959fs_visitor::assign_urb_setup() 1960{ 1961 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1962 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1963 1964 c->prog_data.urb_read_length = 0; 1965 1966 /* Figure out where each of the incoming setup attributes lands. */ 1967 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1968 interp_reg_nr[i] = -1; 1969 1970 if (i != FRAG_ATTRIB_WPOS && 1971 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1972 continue; 1973 1974 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1975 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1976 c->prog_data.urb_read_length += 2; 1977 } 1978 1979 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1980 * the correct setup input. 1981 */ 1982 foreach_iter(exec_list_iterator, iter, this->instructions) { 1983 fs_inst *inst = (fs_inst *)iter.get(); 1984 1985 if (inst->opcode != FS_OPCODE_LINTERP) 1986 continue; 1987 1988 assert(inst->src[2].file == FIXED_HW_REG); 1989 1990 int location = inst->src[2].fixed_hw_reg.nr / 2; 1991 assert(interp_reg_nr[location] != -1); 1992 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1993 (inst->src[2].fixed_hw_reg.nr & 1)); 1994 } 1995 1996 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1997} 1998 1999static void 2000trivial_assign_reg(int *reg_hw_locations, fs_reg *reg) 2001{ 2002 if (reg->file == GRF && reg->reg != 0) { 2003 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2004 reg->reg = 0; 2005 } 2006} 2007 2008void 2009fs_visitor::assign_regs() 2010{ 2011 int last_grf = 0; 2012 int hw_reg_mapping[this->virtual_grf_next]; 2013 int i; 2014 2015 hw_reg_mapping[0] = 0; 2016 hw_reg_mapping[1] = this->first_non_payload_grf; 2017 for (i = 2; i < this->virtual_grf_next; i++) { 2018 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2019 this->virtual_grf_sizes[i - 1]); 2020 } 2021 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2022 2023 /* FINISHME: trivial assignment of register numbers */ 2024 foreach_iter(exec_list_iterator, iter, this->instructions) { 2025 fs_inst *inst = (fs_inst *)iter.get(); 2026 2027 trivial_assign_reg(hw_reg_mapping, &inst->dst); 2028 trivial_assign_reg(hw_reg_mapping, &inst->src[0]); 2029 trivial_assign_reg(hw_reg_mapping, &inst->src[1]); 2030 } 2031 2032 this->grf_used = last_grf + 1; 2033} 2034 2035static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2036{ 2037 struct brw_reg brw_reg; 2038 2039 switch (reg->file) { 2040 case GRF: 2041 case ARF: 2042 case MRF: 2043 brw_reg = brw_vec8_reg(reg->file, 2044 reg->hw_reg, 0); 2045 brw_reg = retype(brw_reg, reg->type); 2046 break; 2047 case IMM: 2048 switch (reg->type) { 2049 case BRW_REGISTER_TYPE_F: 2050 brw_reg = brw_imm_f(reg->imm.f); 2051 break; 2052 case BRW_REGISTER_TYPE_D: 2053 brw_reg = brw_imm_d(reg->imm.i); 2054 break; 2055 case BRW_REGISTER_TYPE_UD: 2056 brw_reg = brw_imm_ud(reg->imm.u); 2057 break; 2058 default: 2059 assert(!"not reached"); 2060 break; 2061 } 2062 break; 2063 case FIXED_HW_REG: 2064 brw_reg = reg->fixed_hw_reg; 2065 break; 2066 case BAD_FILE: 2067 /* Probably unused. */ 2068 brw_reg = brw_null_reg(); 2069 break; 2070 case UNIFORM: 2071 assert(!"not reached"); 2072 brw_reg = brw_null_reg(); 2073 break; 2074 } 2075 if (reg->abs) 2076 brw_reg = brw_abs(brw_reg); 2077 if (reg->negate) 2078 brw_reg = negate(brw_reg); 2079 2080 return brw_reg; 2081} 2082 2083void 2084fs_visitor::generate_code() 2085{ 2086 unsigned int annotation_len = 0; 2087 int last_native_inst = 0; 2088 struct brw_instruction *if_stack[16], *loop_stack[16]; 2089 int if_stack_depth = 0, loop_stack_depth = 0; 2090 int if_depth_in_loop[16]; 2091 2092 if_depth_in_loop[loop_stack_depth] = 0; 2093 2094 memset(&if_stack, 0, sizeof(if_stack)); 2095 foreach_iter(exec_list_iterator, iter, this->instructions) { 2096 fs_inst *inst = (fs_inst *)iter.get(); 2097 struct brw_reg src[3], dst; 2098 2099 for (unsigned int i = 0; i < 3; i++) { 2100 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2101 } 2102 dst = brw_reg_from_fs_reg(&inst->dst); 2103 2104 brw_set_conditionalmod(p, inst->conditional_mod); 2105 brw_set_predicate_control(p, inst->predicated); 2106 2107 switch (inst->opcode) { 2108 case BRW_OPCODE_MOV: 2109 brw_MOV(p, dst, src[0]); 2110 break; 2111 case BRW_OPCODE_ADD: 2112 brw_ADD(p, dst, src[0], src[1]); 2113 break; 2114 case BRW_OPCODE_MUL: 2115 brw_MUL(p, dst, src[0], src[1]); 2116 break; 2117 2118 case BRW_OPCODE_FRC: 2119 brw_FRC(p, dst, src[0]); 2120 break; 2121 case BRW_OPCODE_RNDD: 2122 brw_RNDD(p, dst, src[0]); 2123 break; 2124 case BRW_OPCODE_RNDZ: 2125 brw_RNDZ(p, dst, src[0]); 2126 break; 2127 2128 case BRW_OPCODE_AND: 2129 brw_AND(p, dst, src[0], src[1]); 2130 break; 2131 case BRW_OPCODE_OR: 2132 brw_OR(p, dst, src[0], src[1]); 2133 break; 2134 case BRW_OPCODE_XOR: 2135 brw_XOR(p, dst, src[0], src[1]); 2136 break; 2137 2138 case BRW_OPCODE_CMP: 2139 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2140 break; 2141 case BRW_OPCODE_SEL: 2142 brw_SEL(p, dst, src[0], src[1]); 2143 break; 2144 2145 case BRW_OPCODE_IF: 2146 assert(if_stack_depth < 16); 2147 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2148 if_depth_in_loop[loop_stack_depth]++; 2149 if_stack_depth++; 2150 break; 2151 case BRW_OPCODE_ELSE: 2152 if_stack[if_stack_depth - 1] = 2153 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2154 break; 2155 case BRW_OPCODE_ENDIF: 2156 if_stack_depth--; 2157 brw_ENDIF(p , if_stack[if_stack_depth]); 2158 if_depth_in_loop[loop_stack_depth]--; 2159 break; 2160 2161 case BRW_OPCODE_DO: 2162 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2163 if_depth_in_loop[loop_stack_depth] = 0; 2164 break; 2165 2166 case BRW_OPCODE_BREAK: 2167 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2168 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2169 break; 2170 case BRW_OPCODE_CONTINUE: 2171 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2172 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2173 break; 2174 2175 case BRW_OPCODE_WHILE: { 2176 struct brw_instruction *inst0, *inst1; 2177 GLuint br = 1; 2178 2179 if (intel->gen == 5) 2180 br = 2; 2181 2182 assert(loop_stack_depth > 0); 2183 loop_stack_depth--; 2184 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2185 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2186 while (inst0 > loop_stack[loop_stack_depth]) { 2187 inst0--; 2188 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2189 inst0->bits3.if_else.jump_count == 0) { 2190 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2191 } 2192 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2193 inst0->bits3.if_else.jump_count == 0) { 2194 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2195 } 2196 } 2197 } 2198 break; 2199 2200 case FS_OPCODE_RCP: 2201 case FS_OPCODE_RSQ: 2202 case FS_OPCODE_SQRT: 2203 case FS_OPCODE_EXP2: 2204 case FS_OPCODE_LOG2: 2205 case FS_OPCODE_POW: 2206 case FS_OPCODE_SIN: 2207 case FS_OPCODE_COS: 2208 generate_math(inst, dst, src); 2209 break; 2210 case FS_OPCODE_LINTERP: 2211 generate_linterp(inst, dst, src); 2212 break; 2213 case FS_OPCODE_TEX: 2214 case FS_OPCODE_TXB: 2215 case FS_OPCODE_TXL: 2216 generate_tex(inst, dst, src[0]); 2217 break; 2218 case FS_OPCODE_DISCARD: 2219 generate_discard(inst, dst /* src0 == dst */); 2220 break; 2221 case FS_OPCODE_DDX: 2222 generate_ddx(inst, dst, src[0]); 2223 break; 2224 case FS_OPCODE_DDY: 2225 generate_ddy(inst, dst, src[0]); 2226 break; 2227 case FS_OPCODE_FB_WRITE: 2228 generate_fb_write(inst); 2229 break; 2230 default: 2231 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2232 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2233 brw_opcodes[inst->opcode].name); 2234 } else { 2235 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2236 } 2237 this->fail = true; 2238 } 2239 2240 if (annotation_len < p->nr_insn) { 2241 annotation_len *= 2; 2242 if (annotation_len < 16) 2243 annotation_len = 16; 2244 2245 this->annotation_string = talloc_realloc(this->mem_ctx, 2246 annotation_string, 2247 const char *, 2248 annotation_len); 2249 this->annotation_ir = talloc_realloc(this->mem_ctx, 2250 annotation_ir, 2251 ir_instruction *, 2252 annotation_len); 2253 } 2254 2255 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2256 this->annotation_string[i] = inst->annotation; 2257 this->annotation_ir[i] = inst->ir; 2258 } 2259 last_native_inst = p->nr_insn; 2260 } 2261} 2262 2263GLboolean 2264brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2265{ 2266 struct brw_compile *p = &c->func; 2267 struct intel_context *intel = &brw->intel; 2268 GLcontext *ctx = &intel->ctx; 2269 struct brw_shader *shader = NULL; 2270 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2271 2272 if (!prog) 2273 return GL_FALSE; 2274 2275 if (!using_new_fs) 2276 return GL_FALSE; 2277 2278 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2279 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2280 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2281 break; 2282 } 2283 } 2284 if (!shader) 2285 return GL_FALSE; 2286 2287 /* We always use 8-wide mode, at least for now. For one, flow 2288 * control only works in 8-wide. Also, when we're fragment shader 2289 * bound, we're almost always under register pressure as well, so 2290 * 8-wide would save us from the performance cliff of spilling 2291 * regs. 2292 */ 2293 c->dispatch_width = 8; 2294 2295 if (INTEL_DEBUG & DEBUG_WM) { 2296 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2297 _mesa_print_ir(shader->ir, NULL); 2298 printf("\n"); 2299 } 2300 2301 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2302 */ 2303 fs_visitor v(c, shader); 2304 2305 if (0) { 2306 v.emit_dummy_fs(); 2307 } else { 2308 v.emit_interpolation_setup(); 2309 2310 /* Generate FS IR for main(). (the visitor only descends into 2311 * functions called "main"). 2312 */ 2313 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2314 ir_instruction *ir = (ir_instruction *)iter.get(); 2315 v.base_ir = ir; 2316 ir->accept(&v); 2317 } 2318 2319 v.emit_fb_writes(); 2320 v.assign_curb_setup(); 2321 v.assign_urb_setup(); 2322 v.assign_regs(); 2323 } 2324 2325 v.generate_code(); 2326 2327 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2328 2329 if (v.fail) 2330 return GL_FALSE; 2331 2332 if (INTEL_DEBUG & DEBUG_WM) { 2333 const char *last_annotation_string = NULL; 2334 ir_instruction *last_annotation_ir = NULL; 2335 2336 printf("Native code for fragment shader %d:\n", prog->Name); 2337 for (unsigned int i = 0; i < p->nr_insn; i++) { 2338 if (last_annotation_ir != v.annotation_ir[i]) { 2339 last_annotation_ir = v.annotation_ir[i]; 2340 if (last_annotation_ir) { 2341 printf(" "); 2342 last_annotation_ir->print(); 2343 printf("\n"); 2344 } 2345 } 2346 if (last_annotation_string != v.annotation_string[i]) { 2347 last_annotation_string = v.annotation_string[i]; 2348 if (last_annotation_string) 2349 printf(" %s\n", last_annotation_string); 2350 } 2351 brw_disasm(stdout, &p->store[i], intel->gen); 2352 } 2353 printf("\n"); 2354 } 2355 2356 c->prog_data.total_grf = v.grf_used; 2357 c->prog_data.total_scratch = 0; 2358 2359 return GL_TRUE; 2360} 2361