brw_fs.cpp revision 4fb0c92c6986cf4e88296bab8837320210f1794f
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "main/uniforms.h" 35#include "program/prog_parameter.h" 36#include "program/prog_print.h" 37#include "program/prog_optimize.h" 38#include "program/register_allocate.h" 39#include "program/sampler.h" 40#include "program/hash_table.h" 41#include "brw_context.h" 42#include "brw_eu.h" 43#include "brw_wm.h" 44#include "talloc.h" 45} 46#include "../glsl/glsl_types.h" 47#include "../glsl/ir_optimization.h" 48#include "../glsl/ir_print_visitor.h" 49 50enum register_file { 51 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 52 GRF = BRW_GENERAL_REGISTER_FILE, 53 MRF = BRW_MESSAGE_REGISTER_FILE, 54 IMM = BRW_IMMEDIATE_VALUE, 55 FIXED_HW_REG, /* a struct brw_reg */ 56 UNIFORM, /* prog_data->params[hw_reg] */ 57 BAD_FILE 58}; 59 60enum fs_opcodes { 61 FS_OPCODE_FB_WRITE = 256, 62 FS_OPCODE_RCP, 63 FS_OPCODE_RSQ, 64 FS_OPCODE_SQRT, 65 FS_OPCODE_EXP2, 66 FS_OPCODE_LOG2, 67 FS_OPCODE_POW, 68 FS_OPCODE_SIN, 69 FS_OPCODE_COS, 70 FS_OPCODE_DDX, 71 FS_OPCODE_DDY, 72 FS_OPCODE_LINTERP, 73 FS_OPCODE_TEX, 74 FS_OPCODE_TXB, 75 FS_OPCODE_TXL, 76 FS_OPCODE_DISCARD, 77}; 78 79static int using_new_fs = -1; 80static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 81 82struct gl_shader * 83brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 84{ 85 struct brw_shader *shader; 86 87 shader = talloc_zero(NULL, struct brw_shader); 88 if (shader) { 89 shader->base.Type = type; 90 shader->base.Name = name; 91 _mesa_init_shader(ctx, &shader->base); 92 } 93 94 return &shader->base; 95} 96 97struct gl_shader_program * 98brw_new_shader_program(GLcontext *ctx, GLuint name) 99{ 100 struct brw_shader_program *prog; 101 prog = talloc_zero(NULL, struct brw_shader_program); 102 if (prog) { 103 prog->base.Name = name; 104 _mesa_init_shader_program(ctx, &prog->base); 105 } 106 return &prog->base; 107} 108 109GLboolean 110brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 111{ 112 if (!_mesa_ir_compile_shader(ctx, shader)) 113 return GL_FALSE; 114 115 return GL_TRUE; 116} 117 118GLboolean 119brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 120{ 121 if (using_new_fs == -1) 122 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 123 124 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 125 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 126 127 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 128 void *mem_ctx = talloc_new(NULL); 129 bool progress; 130 131 if (shader->ir) 132 talloc_free(shader->ir); 133 shader->ir = new(shader) exec_list; 134 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 135 136 do_mat_op_to_vec(shader->ir); 137 do_mod_to_fract(shader->ir); 138 do_div_to_mul_rcp(shader->ir); 139 do_sub_to_add_neg(shader->ir); 140 do_explog_to_explog2(shader->ir); 141 do_lower_texture_projection(shader->ir); 142 143 do { 144 progress = false; 145 146 brw_do_channel_expressions(shader->ir); 147 brw_do_vector_splitting(shader->ir); 148 149 progress = do_lower_jumps(shader->ir, true, true, 150 true, /* main return */ 151 false, /* continue */ 152 false /* loops */ 153 ) || progress; 154 155 progress = do_common_optimization(shader->ir, true, 32) || progress; 156 157 progress = lower_noise(shader->ir) || progress; 158 progress = 159 lower_variable_index_to_cond_assign(shader->ir, 160 GL_TRUE, /* input */ 161 GL_TRUE, /* output */ 162 GL_TRUE, /* temp */ 163 GL_TRUE /* uniform */ 164 ) || progress; 165 } while (progress); 166 167 validate_ir_tree(shader->ir); 168 169 reparent_ir(shader->ir, shader->ir); 170 talloc_free(mem_ctx); 171 } 172 } 173 174 if (!_mesa_ir_link_shader(ctx, prog)) 175 return GL_FALSE; 176 177 return GL_TRUE; 178} 179 180static int 181type_size(const struct glsl_type *type) 182{ 183 unsigned int size, i; 184 185 switch (type->base_type) { 186 case GLSL_TYPE_UINT: 187 case GLSL_TYPE_INT: 188 case GLSL_TYPE_FLOAT: 189 case GLSL_TYPE_BOOL: 190 return type->components(); 191 case GLSL_TYPE_ARRAY: 192 return type_size(type->fields.array) * type->length; 193 case GLSL_TYPE_STRUCT: 194 size = 0; 195 for (i = 0; i < type->length; i++) { 196 size += type_size(type->fields.structure[i].type); 197 } 198 return size; 199 case GLSL_TYPE_SAMPLER: 200 /* Samplers take up no register space, since they're baked in at 201 * link time. 202 */ 203 return 0; 204 default: 205 assert(!"not reached"); 206 return 0; 207 } 208} 209 210class fs_reg { 211public: 212 /* Callers of this talloc-based new need not call delete. It's 213 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 214 static void* operator new(size_t size, void *ctx) 215 { 216 void *node; 217 218 node = talloc_size(ctx, size); 219 assert(node != NULL); 220 221 return node; 222 } 223 224 void init() 225 { 226 this->reg = 0; 227 this->reg_offset = 0; 228 this->negate = 0; 229 this->abs = 0; 230 this->hw_reg = -1; 231 } 232 233 /** Generic unset register constructor. */ 234 fs_reg() 235 { 236 init(); 237 this->file = BAD_FILE; 238 } 239 240 /** Immediate value constructor. */ 241 fs_reg(float f) 242 { 243 init(); 244 this->file = IMM; 245 this->type = BRW_REGISTER_TYPE_F; 246 this->imm.f = f; 247 } 248 249 /** Immediate value constructor. */ 250 fs_reg(int32_t i) 251 { 252 init(); 253 this->file = IMM; 254 this->type = BRW_REGISTER_TYPE_D; 255 this->imm.i = i; 256 } 257 258 /** Immediate value constructor. */ 259 fs_reg(uint32_t u) 260 { 261 init(); 262 this->file = IMM; 263 this->type = BRW_REGISTER_TYPE_UD; 264 this->imm.u = u; 265 } 266 267 /** Fixed brw_reg Immediate value constructor. */ 268 fs_reg(struct brw_reg fixed_hw_reg) 269 { 270 init(); 271 this->file = FIXED_HW_REG; 272 this->fixed_hw_reg = fixed_hw_reg; 273 this->type = fixed_hw_reg.type; 274 } 275 276 fs_reg(enum register_file file, int hw_reg); 277 fs_reg(class fs_visitor *v, const struct glsl_type *type); 278 279 /** Register file: ARF, GRF, MRF, IMM. */ 280 enum register_file file; 281 /** virtual register number. 0 = fixed hw reg */ 282 int reg; 283 /** Offset within the virtual register. */ 284 int reg_offset; 285 /** HW register number. Generally unset until register allocation. */ 286 int hw_reg; 287 /** Register type. BRW_REGISTER_TYPE_* */ 288 int type; 289 bool negate; 290 bool abs; 291 struct brw_reg fixed_hw_reg; 292 293 /** Value for file == BRW_IMMMEDIATE_FILE */ 294 union { 295 int32_t i; 296 uint32_t u; 297 float f; 298 } imm; 299}; 300 301static const fs_reg reg_undef; 302static const fs_reg reg_null(ARF, BRW_ARF_NULL); 303 304class fs_inst : public exec_node { 305public: 306 /* Callers of this talloc-based new need not call delete. It's 307 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 308 static void* operator new(size_t size, void *ctx) 309 { 310 void *node; 311 312 node = talloc_zero_size(ctx, size); 313 assert(node != NULL); 314 315 return node; 316 } 317 318 void init() 319 { 320 this->opcode = BRW_OPCODE_NOP; 321 this->saturate = false; 322 this->conditional_mod = BRW_CONDITIONAL_NONE; 323 this->predicated = false; 324 this->sampler = 0; 325 this->target = 0; 326 this->eot = false; 327 this->shadow_compare = false; 328 } 329 330 fs_inst() 331 { 332 init(); 333 } 334 335 fs_inst(int opcode) 336 { 337 init(); 338 this->opcode = opcode; 339 } 340 341 fs_inst(int opcode, fs_reg dst, fs_reg src0) 342 { 343 init(); 344 this->opcode = opcode; 345 this->dst = dst; 346 this->src[0] = src0; 347 } 348 349 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 350 { 351 init(); 352 this->opcode = opcode; 353 this->dst = dst; 354 this->src[0] = src0; 355 this->src[1] = src1; 356 } 357 358 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 359 { 360 init(); 361 this->opcode = opcode; 362 this->dst = dst; 363 this->src[0] = src0; 364 this->src[1] = src1; 365 this->src[2] = src2; 366 } 367 368 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 369 fs_reg dst; 370 fs_reg src[3]; 371 bool saturate; 372 bool predicated; 373 int conditional_mod; /**< BRW_CONDITIONAL_* */ 374 375 int mlen; /**< SEND message length */ 376 int sampler; 377 int target; /**< MRT target. */ 378 bool eot; 379 bool shadow_compare; 380 381 /** @{ 382 * Annotation for the generated IR. One of the two can be set. 383 */ 384 ir_instruction *ir; 385 const char *annotation; 386 /** @} */ 387}; 388 389class fs_visitor : public ir_visitor 390{ 391public: 392 393 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 394 { 395 this->c = c; 396 this->p = &c->func; 397 this->brw = p->brw; 398 this->fp = brw->fragment_program; 399 this->intel = &brw->intel; 400 this->ctx = &intel->ctx; 401 this->mem_ctx = talloc_new(NULL); 402 this->shader = shader; 403 this->fail = false; 404 this->variable_ht = hash_table_ctor(0, 405 hash_table_pointer_hash, 406 hash_table_pointer_compare); 407 408 this->frag_color = NULL; 409 this->frag_data = NULL; 410 this->frag_depth = NULL; 411 this->first_non_payload_grf = 0; 412 413 this->current_annotation = NULL; 414 this->annotation_string = NULL; 415 this->annotation_ir = NULL; 416 this->base_ir = NULL; 417 418 this->virtual_grf_sizes = NULL; 419 this->virtual_grf_next = 1; 420 this->virtual_grf_array_size = 0; 421 this->virtual_grf_def = NULL; 422 this->virtual_grf_use = NULL; 423 } 424 ~fs_visitor() 425 { 426 talloc_free(this->mem_ctx); 427 hash_table_dtor(this->variable_ht); 428 } 429 430 fs_reg *variable_storage(ir_variable *var); 431 int virtual_grf_alloc(int size); 432 433 void visit(ir_variable *ir); 434 void visit(ir_assignment *ir); 435 void visit(ir_dereference_variable *ir); 436 void visit(ir_dereference_record *ir); 437 void visit(ir_dereference_array *ir); 438 void visit(ir_expression *ir); 439 void visit(ir_texture *ir); 440 void visit(ir_if *ir); 441 void visit(ir_constant *ir); 442 void visit(ir_swizzle *ir); 443 void visit(ir_return *ir); 444 void visit(ir_loop *ir); 445 void visit(ir_loop_jump *ir); 446 void visit(ir_discard *ir); 447 void visit(ir_call *ir); 448 void visit(ir_function *ir); 449 void visit(ir_function_signature *ir); 450 451 fs_inst *emit(fs_inst inst); 452 void assign_curb_setup(); 453 void calculate_urb_setup(); 454 void assign_urb_setup(); 455 void assign_regs(); 456 void assign_regs_trivial(); 457 void calculate_live_intervals(); 458 bool virtual_grf_interferes(int a, int b); 459 void generate_code(); 460 void generate_fb_write(fs_inst *inst); 461 void generate_linterp(fs_inst *inst, struct brw_reg dst, 462 struct brw_reg *src); 463 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 464 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 465 void generate_discard(fs_inst *inst, struct brw_reg temp); 466 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 467 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 468 469 void emit_dummy_fs(); 470 void emit_fragcoord_interpolation(ir_variable *ir); 471 void emit_general_interpolation(ir_variable *ir); 472 void emit_interpolation_setup_gen4(); 473 void emit_interpolation_setup_gen6(); 474 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate); 475 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); 476 void emit_fb_writes(); 477 void emit_assignment_writes(fs_reg &l, fs_reg &r, 478 const glsl_type *type, bool predicated); 479 480 struct brw_reg interp_reg(int location, int channel); 481 int setup_uniform_values(int loc, const glsl_type *type); 482 void setup_builtin_uniform_values(ir_variable *ir); 483 484 struct brw_context *brw; 485 const struct gl_fragment_program *fp; 486 struct intel_context *intel; 487 GLcontext *ctx; 488 struct brw_wm_compile *c; 489 struct brw_compile *p; 490 struct brw_shader *shader; 491 void *mem_ctx; 492 exec_list instructions; 493 494 int *virtual_grf_sizes; 495 int virtual_grf_next; 496 int virtual_grf_array_size; 497 int *virtual_grf_def; 498 int *virtual_grf_use; 499 500 struct hash_table *variable_ht; 501 ir_variable *frag_color, *frag_data, *frag_depth; 502 int first_non_payload_grf; 503 int urb_setup[FRAG_ATTRIB_MAX]; 504 505 /** @{ debug annotation info */ 506 const char *current_annotation; 507 ir_instruction *base_ir; 508 const char **annotation_string; 509 ir_instruction **annotation_ir; 510 /** @} */ 511 512 bool fail; 513 514 /* Result of last visit() method. */ 515 fs_reg result; 516 517 fs_reg pixel_x; 518 fs_reg pixel_y; 519 fs_reg wpos_w; 520 fs_reg pixel_w; 521 fs_reg delta_x; 522 fs_reg delta_y; 523 524 int grf_used; 525 526}; 527 528int 529fs_visitor::virtual_grf_alloc(int size) 530{ 531 if (virtual_grf_array_size <= virtual_grf_next) { 532 if (virtual_grf_array_size == 0) 533 virtual_grf_array_size = 16; 534 else 535 virtual_grf_array_size *= 2; 536 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, 537 int, virtual_grf_array_size); 538 539 /* This slot is always unused. */ 540 virtual_grf_sizes[0] = 0; 541 } 542 virtual_grf_sizes[virtual_grf_next] = size; 543 return virtual_grf_next++; 544} 545 546/** Fixed HW reg constructor. */ 547fs_reg::fs_reg(enum register_file file, int hw_reg) 548{ 549 init(); 550 this->file = file; 551 this->hw_reg = hw_reg; 552 this->type = BRW_REGISTER_TYPE_F; 553} 554 555int 556brw_type_for_base_type(const struct glsl_type *type) 557{ 558 switch (type->base_type) { 559 case GLSL_TYPE_FLOAT: 560 return BRW_REGISTER_TYPE_F; 561 case GLSL_TYPE_INT: 562 case GLSL_TYPE_BOOL: 563 return BRW_REGISTER_TYPE_D; 564 case GLSL_TYPE_UINT: 565 return BRW_REGISTER_TYPE_UD; 566 case GLSL_TYPE_ARRAY: 567 case GLSL_TYPE_STRUCT: 568 /* These should be overridden with the type of the member when 569 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 570 * way to trip up if we don't. 571 */ 572 return BRW_REGISTER_TYPE_UD; 573 default: 574 assert(!"not reached"); 575 return BRW_REGISTER_TYPE_F; 576 } 577} 578 579/** Automatic reg constructor. */ 580fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 581{ 582 init(); 583 584 this->file = GRF; 585 this->reg = v->virtual_grf_alloc(type_size(type)); 586 this->reg_offset = 0; 587 this->type = brw_type_for_base_type(type); 588} 589 590fs_reg * 591fs_visitor::variable_storage(ir_variable *var) 592{ 593 return (fs_reg *)hash_table_find(this->variable_ht, var); 594} 595 596/* Our support for uniforms is piggy-backed on the struct 597 * gl_fragment_program, because that's where the values actually 598 * get stored, rather than in some global gl_shader_program uniform 599 * store. 600 */ 601int 602fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 603{ 604 unsigned int offset = 0; 605 float *vec_values; 606 607 if (type->is_matrix()) { 608 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 609 type->vector_elements, 610 1); 611 612 for (unsigned int i = 0; i < type->matrix_columns; i++) { 613 offset += setup_uniform_values(loc + offset, column); 614 } 615 616 return offset; 617 } 618 619 switch (type->base_type) { 620 case GLSL_TYPE_FLOAT: 621 case GLSL_TYPE_UINT: 622 case GLSL_TYPE_INT: 623 case GLSL_TYPE_BOOL: 624 vec_values = fp->Base.Parameters->ParameterValues[loc]; 625 for (unsigned int i = 0; i < type->vector_elements; i++) { 626 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 627 } 628 return 1; 629 630 case GLSL_TYPE_STRUCT: 631 for (unsigned int i = 0; i < type->length; i++) { 632 offset += setup_uniform_values(loc + offset, 633 type->fields.structure[i].type); 634 } 635 return offset; 636 637 case GLSL_TYPE_ARRAY: 638 for (unsigned int i = 0; i < type->length; i++) { 639 offset += setup_uniform_values(loc + offset, type->fields.array); 640 } 641 return offset; 642 643 case GLSL_TYPE_SAMPLER: 644 /* The sampler takes up a slot, but we don't use any values from it. */ 645 return 1; 646 647 default: 648 assert(!"not reached"); 649 return 0; 650 } 651} 652 653 654/* Our support for builtin uniforms is even scarier than non-builtin. 655 * It sits on top of the PROG_STATE_VAR parameters that are 656 * automatically updated from GL context state. 657 */ 658void 659fs_visitor::setup_builtin_uniform_values(ir_variable *ir) 660{ 661 const struct gl_builtin_uniform_desc *statevar = NULL; 662 663 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { 664 statevar = &_mesa_builtin_uniform_desc[i]; 665 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) 666 break; 667 } 668 669 if (!statevar->name) { 670 this->fail = true; 671 printf("Failed to find builtin uniform `%s'\n", ir->name); 672 return; 673 } 674 675 int array_count; 676 if (ir->type->is_array()) { 677 array_count = ir->type->length; 678 } else { 679 array_count = 1; 680 } 681 682 for (int a = 0; a < array_count; a++) { 683 for (unsigned int i = 0; i < statevar->num_elements; i++) { 684 struct gl_builtin_uniform_element *element = &statevar->elements[i]; 685 int tokens[STATE_LENGTH]; 686 687 memcpy(tokens, element->tokens, sizeof(element->tokens)); 688 if (ir->type->is_array()) { 689 tokens[1] = a; 690 } 691 692 /* This state reference has already been setup by ir_to_mesa, 693 * but we'll get the same index back here. 694 */ 695 int index = _mesa_add_state_reference(this->fp->Base.Parameters, 696 (gl_state_index *)tokens); 697 float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; 698 699 /* Add each of the unique swizzles of the element as a 700 * parameter. This'll end up matching the expected layout of 701 * the array/matrix/structure we're trying to fill in. 702 */ 703 int last_swiz = -1; 704 for (unsigned int i = 0; i < 4; i++) { 705 int swiz = GET_SWZ(element->swizzle, i); 706 if (swiz == last_swiz) 707 break; 708 last_swiz = swiz; 709 710 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; 711 } 712 } 713 } 714} 715 716void 717fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 718{ 719 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 720 fs_reg wpos = *reg; 721 fs_reg neg_y = this->pixel_y; 722 neg_y.negate = true; 723 724 /* gl_FragCoord.x */ 725 if (ir->pixel_center_integer) { 726 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 727 } else { 728 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 729 } 730 wpos.reg_offset++; 731 732 /* gl_FragCoord.y */ 733 if (ir->origin_upper_left && ir->pixel_center_integer) { 734 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 735 } else { 736 fs_reg pixel_y = this->pixel_y; 737 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 738 739 if (!ir->origin_upper_left) { 740 pixel_y.negate = true; 741 offset += c->key.drawable_height - 1.0; 742 } 743 744 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 745 } 746 wpos.reg_offset++; 747 748 /* gl_FragCoord.z */ 749 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 750 interp_reg(FRAG_ATTRIB_WPOS, 2))); 751 wpos.reg_offset++; 752 753 /* gl_FragCoord.w: Already set up in emit_interpolation */ 754 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 755 756 hash_table_insert(this->variable_ht, reg, ir); 757} 758 759 760void 761fs_visitor::emit_general_interpolation(ir_variable *ir) 762{ 763 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 764 /* Interpolation is always in floating point regs. */ 765 reg->type = BRW_REGISTER_TYPE_F; 766 fs_reg attr = *reg; 767 768 unsigned int array_elements; 769 const glsl_type *type; 770 771 if (ir->type->is_array()) { 772 array_elements = ir->type->length; 773 if (array_elements == 0) { 774 this->fail = true; 775 } 776 type = ir->type->fields.array; 777 } else { 778 array_elements = 1; 779 type = ir->type; 780 } 781 782 int location = ir->location; 783 for (unsigned int i = 0; i < array_elements; i++) { 784 for (unsigned int j = 0; j < type->matrix_columns; j++) { 785 if (urb_setup[location] == -1) { 786 /* If there's no incoming setup data for this slot, don't 787 * emit interpolation for it. 788 */ 789 attr.reg_offset += type->vector_elements; 790 location++; 791 continue; 792 } 793 794 for (unsigned int c = 0; c < type->vector_elements; c++) { 795 struct brw_reg interp = interp_reg(location, c); 796 emit(fs_inst(FS_OPCODE_LINTERP, 797 attr, 798 this->delta_x, 799 this->delta_y, 800 fs_reg(interp))); 801 attr.reg_offset++; 802 } 803 attr.reg_offset -= type->vector_elements; 804 805 for (unsigned int c = 0; c < type->vector_elements; c++) { 806 emit(fs_inst(BRW_OPCODE_MUL, 807 attr, 808 attr, 809 this->pixel_w)); 810 attr.reg_offset++; 811 } 812 location++; 813 } 814 } 815 816 hash_table_insert(this->variable_ht, reg, ir); 817} 818 819void 820fs_visitor::visit(ir_variable *ir) 821{ 822 fs_reg *reg = NULL; 823 824 if (variable_storage(ir)) 825 return; 826 827 if (strcmp(ir->name, "gl_FragColor") == 0) { 828 this->frag_color = ir; 829 } else if (strcmp(ir->name, "gl_FragData") == 0) { 830 this->frag_data = ir; 831 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 832 this->frag_depth = ir; 833 } 834 835 if (ir->mode == ir_var_in) { 836 if (!strcmp(ir->name, "gl_FragCoord")) { 837 emit_fragcoord_interpolation(ir); 838 return; 839 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 840 reg = new(this->mem_ctx) fs_reg(this, ir->type); 841 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 842 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 843 * us front face 844 */ 845 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 846 *reg, 847 fs_reg(r1_6ud), 848 fs_reg(1u << 31))); 849 inst->conditional_mod = BRW_CONDITIONAL_L; 850 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 851 } else { 852 emit_general_interpolation(ir); 853 return; 854 } 855 } 856 857 if (ir->mode == ir_var_uniform) { 858 int param_index = c->prog_data.nr_params; 859 860 if (!strncmp(ir->name, "gl_", 3)) { 861 setup_builtin_uniform_values(ir); 862 } else { 863 setup_uniform_values(ir->location, ir->type); 864 } 865 866 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 867 } 868 869 if (!reg) 870 reg = new(this->mem_ctx) fs_reg(this, ir->type); 871 872 hash_table_insert(this->variable_ht, reg, ir); 873} 874 875void 876fs_visitor::visit(ir_dereference_variable *ir) 877{ 878 fs_reg *reg = variable_storage(ir->var); 879 this->result = *reg; 880} 881 882void 883fs_visitor::visit(ir_dereference_record *ir) 884{ 885 const glsl_type *struct_type = ir->record->type; 886 887 ir->record->accept(this); 888 889 unsigned int offset = 0; 890 for (unsigned int i = 0; i < struct_type->length; i++) { 891 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 892 break; 893 offset += type_size(struct_type->fields.structure[i].type); 894 } 895 this->result.reg_offset += offset; 896 this->result.type = brw_type_for_base_type(ir->type); 897} 898 899void 900fs_visitor::visit(ir_dereference_array *ir) 901{ 902 ir_constant *index; 903 int element_size; 904 905 ir->array->accept(this); 906 index = ir->array_index->as_constant(); 907 908 element_size = type_size(ir->type); 909 this->result.type = brw_type_for_base_type(ir->type); 910 911 if (index) { 912 assert(this->result.file == UNIFORM || 913 (this->result.file == GRF && 914 this->result.reg != 0)); 915 this->result.reg_offset += index->value.i[0] * element_size; 916 } else { 917 assert(!"FINISHME: non-constant array element"); 918 } 919} 920 921void 922fs_visitor::visit(ir_expression *ir) 923{ 924 unsigned int operand; 925 fs_reg op[2], temp; 926 fs_reg result; 927 fs_inst *inst; 928 929 for (operand = 0; operand < ir->get_num_operands(); operand++) { 930 ir->operands[operand]->accept(this); 931 if (this->result.file == BAD_FILE) { 932 ir_print_visitor v; 933 printf("Failed to get tree for expression operand:\n"); 934 ir->operands[operand]->accept(&v); 935 this->fail = true; 936 } 937 op[operand] = this->result; 938 939 /* Matrix expression operands should have been broken down to vector 940 * operations already. 941 */ 942 assert(!ir->operands[operand]->type->is_matrix()); 943 /* And then those vector operands should have been broken down to scalar. 944 */ 945 assert(!ir->operands[operand]->type->is_vector()); 946 } 947 948 /* Storage for our result. If our result goes into an assignment, it will 949 * just get copy-propagated out, so no worries. 950 */ 951 this->result = fs_reg(this, ir->type); 952 953 switch (ir->operation) { 954 case ir_unop_logic_not: 955 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 956 break; 957 case ir_unop_neg: 958 op[0].negate = !op[0].negate; 959 this->result = op[0]; 960 break; 961 case ir_unop_abs: 962 op[0].abs = true; 963 this->result = op[0]; 964 break; 965 case ir_unop_sign: 966 temp = fs_reg(this, ir->type); 967 968 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 969 970 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 971 inst->conditional_mod = BRW_CONDITIONAL_G; 972 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 973 inst->predicated = true; 974 975 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 976 inst->conditional_mod = BRW_CONDITIONAL_L; 977 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 978 inst->predicated = true; 979 980 break; 981 case ir_unop_rcp: 982 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 983 break; 984 985 case ir_unop_exp2: 986 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 987 break; 988 case ir_unop_log2: 989 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 990 break; 991 case ir_unop_exp: 992 case ir_unop_log: 993 assert(!"not reached: should be handled by ir_explog_to_explog2"); 994 break; 995 case ir_unop_sin: 996 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 997 break; 998 case ir_unop_cos: 999 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 1000 break; 1001 1002 case ir_unop_dFdx: 1003 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 1004 break; 1005 case ir_unop_dFdy: 1006 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 1007 break; 1008 1009 case ir_binop_add: 1010 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 1011 break; 1012 case ir_binop_sub: 1013 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 1014 break; 1015 1016 case ir_binop_mul: 1017 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 1018 break; 1019 case ir_binop_div: 1020 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1021 break; 1022 case ir_binop_mod: 1023 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1024 break; 1025 1026 case ir_binop_less: 1027 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1028 inst->conditional_mod = BRW_CONDITIONAL_L; 1029 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1030 break; 1031 case ir_binop_greater: 1032 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1033 inst->conditional_mod = BRW_CONDITIONAL_G; 1034 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1035 break; 1036 case ir_binop_lequal: 1037 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1038 inst->conditional_mod = BRW_CONDITIONAL_LE; 1039 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1040 break; 1041 case ir_binop_gequal: 1042 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1043 inst->conditional_mod = BRW_CONDITIONAL_GE; 1044 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1045 break; 1046 case ir_binop_equal: 1047 case ir_binop_all_equal: /* same as nequal for scalars */ 1048 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1049 inst->conditional_mod = BRW_CONDITIONAL_Z; 1050 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1051 break; 1052 case ir_binop_nequal: 1053 case ir_binop_any_nequal: /* same as nequal for scalars */ 1054 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1055 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1056 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 1057 break; 1058 1059 case ir_binop_logic_xor: 1060 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 1061 break; 1062 1063 case ir_binop_logic_or: 1064 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 1065 break; 1066 1067 case ir_binop_logic_and: 1068 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 1069 break; 1070 1071 case ir_binop_dot: 1072 case ir_binop_cross: 1073 case ir_unop_any: 1074 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 1075 break; 1076 1077 case ir_unop_noise: 1078 assert(!"not reached: should be handled by lower_noise"); 1079 break; 1080 1081 case ir_unop_sqrt: 1082 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 1083 break; 1084 1085 case ir_unop_rsq: 1086 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 1087 break; 1088 1089 case ir_unop_i2f: 1090 case ir_unop_b2f: 1091 case ir_unop_b2i: 1092 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1093 break; 1094 case ir_unop_f2i: 1095 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 1096 break; 1097 case ir_unop_f2b: 1098 case ir_unop_i2b: 1099 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 1100 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1101 1102 case ir_unop_trunc: 1103 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1104 break; 1105 case ir_unop_ceil: 1106 op[0].negate = ~op[0].negate; 1107 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1108 this->result.negate = true; 1109 break; 1110 case ir_unop_floor: 1111 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1112 break; 1113 case ir_unop_fract: 1114 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1115 break; 1116 1117 case ir_binop_min: 1118 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1119 inst->conditional_mod = BRW_CONDITIONAL_L; 1120 1121 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1122 inst->predicated = true; 1123 break; 1124 case ir_binop_max: 1125 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1126 inst->conditional_mod = BRW_CONDITIONAL_G; 1127 1128 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1129 inst->predicated = true; 1130 break; 1131 1132 case ir_binop_pow: 1133 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1134 break; 1135 1136 case ir_unop_bit_not: 1137 case ir_unop_u2f: 1138 case ir_binop_lshift: 1139 case ir_binop_rshift: 1140 case ir_binop_bit_and: 1141 case ir_binop_bit_xor: 1142 case ir_binop_bit_or: 1143 assert(!"GLSL 1.30 features unsupported"); 1144 break; 1145 } 1146} 1147 1148void 1149fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, 1150 const glsl_type *type, bool predicated) 1151{ 1152 switch (type->base_type) { 1153 case GLSL_TYPE_FLOAT: 1154 case GLSL_TYPE_UINT: 1155 case GLSL_TYPE_INT: 1156 case GLSL_TYPE_BOOL: 1157 for (unsigned int i = 0; i < type->components(); i++) { 1158 l.type = brw_type_for_base_type(type); 1159 r.type = brw_type_for_base_type(type); 1160 1161 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1162 inst->predicated = predicated; 1163 1164 l.reg_offset++; 1165 r.reg_offset++; 1166 } 1167 break; 1168 case GLSL_TYPE_ARRAY: 1169 for (unsigned int i = 0; i < type->length; i++) { 1170 emit_assignment_writes(l, r, type->fields.array, predicated); 1171 } 1172 1173 case GLSL_TYPE_STRUCT: 1174 for (unsigned int i = 0; i < type->length; i++) { 1175 emit_assignment_writes(l, r, type->fields.structure[i].type, 1176 predicated); 1177 } 1178 break; 1179 1180 case GLSL_TYPE_SAMPLER: 1181 break; 1182 1183 default: 1184 assert(!"not reached"); 1185 break; 1186 } 1187} 1188 1189void 1190fs_visitor::visit(ir_assignment *ir) 1191{ 1192 struct fs_reg l, r; 1193 fs_inst *inst; 1194 1195 /* FINISHME: arrays on the lhs */ 1196 ir->lhs->accept(this); 1197 l = this->result; 1198 1199 ir->rhs->accept(this); 1200 r = this->result; 1201 1202 assert(l.file != BAD_FILE); 1203 assert(r.file != BAD_FILE); 1204 1205 if (ir->condition) { 1206 /* Get the condition bool into the predicate. */ 1207 ir->condition->accept(this); 1208 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1209 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1210 } 1211 1212 if (ir->lhs->type->is_scalar() || 1213 ir->lhs->type->is_vector()) { 1214 for (int i = 0; i < ir->lhs->type->vector_elements; i++) { 1215 if (ir->write_mask & (1 << i)) { 1216 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1217 if (ir->condition) 1218 inst->predicated = true; 1219 r.reg_offset++; 1220 } 1221 l.reg_offset++; 1222 } 1223 } else { 1224 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); 1225 } 1226} 1227 1228fs_inst * 1229fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1230{ 1231 int mlen; 1232 int base_mrf = 2; 1233 bool simd16 = false; 1234 fs_reg orig_dst; 1235 1236 if (ir->shadow_comparitor) { 1237 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1238 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1239 coordinate)); 1240 coordinate.reg_offset++; 1241 } 1242 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1243 mlen = 3; 1244 1245 if (ir->op == ir_tex) { 1246 /* There's no plain shadow compare message, so we use shadow 1247 * compare with a bias of 0.0. 1248 */ 1249 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1250 fs_reg(0.0f))); 1251 mlen++; 1252 } else if (ir->op == ir_txb) { 1253 ir->lod_info.bias->accept(this); 1254 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1255 this->result)); 1256 mlen++; 1257 } else { 1258 assert(ir->op == ir_txl); 1259 ir->lod_info.lod->accept(this); 1260 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1261 this->result)); 1262 mlen++; 1263 } 1264 1265 ir->shadow_comparitor->accept(this); 1266 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1267 mlen++; 1268 } else if (ir->op == ir_tex) { 1269 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1270 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1271 coordinate)); 1272 coordinate.reg_offset++; 1273 } 1274 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ 1275 mlen = 3; 1276 } else { 1277 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod 1278 * instructions. We'll need to do SIMD16 here. 1279 */ 1280 assert(ir->op == ir_txb || ir->op == ir_txl); 1281 1282 for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) { 1283 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1284 coordinate)); 1285 coordinate.reg_offset++; 1286 mlen++; 1287 1288 /* The unused upper half. */ 1289 mlen++; 1290 } 1291 1292 /* lod/bias appears after u/v/r. */ 1293 mlen = 6; 1294 1295 if (ir->op == ir_txb) { 1296 ir->lod_info.bias->accept(this); 1297 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1298 this->result)); 1299 mlen++; 1300 } else { 1301 ir->lod_info.lod->accept(this); 1302 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), 1303 this->result)); 1304 mlen++; 1305 } 1306 1307 /* The unused upper half. */ 1308 mlen++; 1309 1310 /* Now, since we're doing simd16, the return is 2 interleaved 1311 * vec4s where the odd-indexed ones are junk. We'll need to move 1312 * this weirdness around to the expected layout. 1313 */ 1314 simd16 = true; 1315 orig_dst = dst; 1316 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 1317 2)); 1318 dst.type = BRW_REGISTER_TYPE_F; 1319 } 1320 1321 fs_inst *inst = NULL; 1322 switch (ir->op) { 1323 case ir_tex: 1324 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1325 break; 1326 case ir_txb: 1327 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1328 break; 1329 case ir_txl: 1330 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1331 break; 1332 case ir_txd: 1333 case ir_txf: 1334 assert(!"GLSL 1.30 features unsupported"); 1335 break; 1336 } 1337 inst->mlen = mlen; 1338 1339 if (simd16) { 1340 for (int i = 0; i < 4; i++) { 1341 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); 1342 orig_dst.reg_offset++; 1343 dst.reg_offset += 2; 1344 } 1345 } 1346 1347 return inst; 1348} 1349 1350fs_inst * 1351fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) 1352{ 1353 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then 1354 * optional parameters like shadow comparitor or LOD bias. If 1355 * optional parameters aren't present, those base slots are 1356 * optional and don't need to be included in the message. 1357 * 1358 * We don't fill in the unnecessary slots regardless, which may 1359 * look surprising in the disassembly. 1360 */ 1361 int mlen; 1362 int base_mrf = 2; 1363 1364 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1365 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1366 coordinate.reg_offset++; 1367 } 1368 1369 if (ir->shadow_comparitor) { 1370 mlen = MAX2(mlen, 4); 1371 1372 ir->shadow_comparitor->accept(this); 1373 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1374 mlen++; 1375 } 1376 1377 fs_inst *inst = NULL; 1378 switch (ir->op) { 1379 case ir_tex: 1380 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1381 break; 1382 case ir_txb: 1383 ir->lod_info.bias->accept(this); 1384 mlen = MAX2(mlen, 4); 1385 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1386 mlen++; 1387 1388 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1389 break; 1390 case ir_txl: 1391 ir->lod_info.lod->accept(this); 1392 mlen = MAX2(mlen, 4); 1393 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1394 mlen++; 1395 1396 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1397 break; 1398 case ir_txd: 1399 case ir_txf: 1400 assert(!"GLSL 1.30 features unsupported"); 1401 break; 1402 } 1403 inst->mlen = mlen; 1404 1405 return inst; 1406} 1407 1408void 1409fs_visitor::visit(ir_texture *ir) 1410{ 1411 fs_inst *inst = NULL; 1412 1413 ir->coordinate->accept(this); 1414 fs_reg coordinate = this->result; 1415 1416 /* Should be lowered by do_lower_texture_projection */ 1417 assert(!ir->projector); 1418 1419 /* Writemasking doesn't eliminate channels on SIMD8 texture 1420 * samples, so don't worry about them. 1421 */ 1422 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1423 1424 if (intel->gen < 5) { 1425 inst = emit_texture_gen4(ir, dst, coordinate); 1426 } else { 1427 inst = emit_texture_gen5(ir, dst, coordinate); 1428 } 1429 1430 inst->sampler = 1431 _mesa_get_sampler_uniform_value(ir->sampler, 1432 ctx->Shader.CurrentProgram, 1433 &brw->fragment_program->Base); 1434 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1435 1436 this->result = dst; 1437 1438 if (ir->shadow_comparitor) 1439 inst->shadow_compare = true; 1440 1441 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { 1442 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); 1443 1444 for (int i = 0; i < 4; i++) { 1445 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1446 fs_reg l = swizzle_dst; 1447 l.reg_offset += i; 1448 1449 if (swiz == SWIZZLE_ZERO) { 1450 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); 1451 } else if (swiz == SWIZZLE_ONE) { 1452 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); 1453 } else { 1454 fs_reg r = dst; 1455 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); 1456 emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1457 } 1458 } 1459 this->result = swizzle_dst; 1460 } 1461} 1462 1463void 1464fs_visitor::visit(ir_swizzle *ir) 1465{ 1466 ir->val->accept(this); 1467 fs_reg val = this->result; 1468 1469 if (ir->type->vector_elements == 1) { 1470 this->result.reg_offset += ir->mask.x; 1471 return; 1472 } 1473 1474 fs_reg result = fs_reg(this, ir->type); 1475 this->result = result; 1476 1477 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1478 fs_reg channel = val; 1479 int swiz = 0; 1480 1481 switch (i) { 1482 case 0: 1483 swiz = ir->mask.x; 1484 break; 1485 case 1: 1486 swiz = ir->mask.y; 1487 break; 1488 case 2: 1489 swiz = ir->mask.z; 1490 break; 1491 case 3: 1492 swiz = ir->mask.w; 1493 break; 1494 } 1495 1496 channel.reg_offset += swiz; 1497 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1498 result.reg_offset++; 1499 } 1500} 1501 1502void 1503fs_visitor::visit(ir_discard *ir) 1504{ 1505 fs_reg temp = fs_reg(this, glsl_type::uint_type); 1506 1507 assert(ir->condition == NULL); /* FINISHME */ 1508 1509 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp)); 1510} 1511 1512void 1513fs_visitor::visit(ir_constant *ir) 1514{ 1515 fs_reg reg(this, ir->type); 1516 this->result = reg; 1517 1518 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1519 switch (ir->type->base_type) { 1520 case GLSL_TYPE_FLOAT: 1521 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1522 break; 1523 case GLSL_TYPE_UINT: 1524 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1525 break; 1526 case GLSL_TYPE_INT: 1527 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1528 break; 1529 case GLSL_TYPE_BOOL: 1530 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1531 break; 1532 default: 1533 assert(!"Non-float/uint/int/bool constant"); 1534 } 1535 reg.reg_offset++; 1536 } 1537} 1538 1539void 1540fs_visitor::visit(ir_if *ir) 1541{ 1542 fs_inst *inst; 1543 1544 /* Don't point the annotation at the if statement, because then it plus 1545 * the then and else blocks get printed. 1546 */ 1547 this->base_ir = ir->condition; 1548 1549 /* Generate the condition into the condition code. */ 1550 ir->condition->accept(this); 1551 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1552 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1553 1554 inst = emit(fs_inst(BRW_OPCODE_IF)); 1555 inst->predicated = true; 1556 1557 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1558 ir_instruction *ir = (ir_instruction *)iter.get(); 1559 this->base_ir = ir; 1560 1561 ir->accept(this); 1562 } 1563 1564 if (!ir->else_instructions.is_empty()) { 1565 emit(fs_inst(BRW_OPCODE_ELSE)); 1566 1567 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1568 ir_instruction *ir = (ir_instruction *)iter.get(); 1569 this->base_ir = ir; 1570 1571 ir->accept(this); 1572 } 1573 } 1574 1575 emit(fs_inst(BRW_OPCODE_ENDIF)); 1576} 1577 1578void 1579fs_visitor::visit(ir_loop *ir) 1580{ 1581 fs_reg counter = reg_undef; 1582 1583 if (ir->counter) { 1584 this->base_ir = ir->counter; 1585 ir->counter->accept(this); 1586 counter = *(variable_storage(ir->counter)); 1587 1588 if (ir->from) { 1589 this->base_ir = ir->from; 1590 ir->from->accept(this); 1591 1592 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1593 } 1594 } 1595 1596 emit(fs_inst(BRW_OPCODE_DO)); 1597 1598 if (ir->to) { 1599 this->base_ir = ir->to; 1600 ir->to->accept(this); 1601 1602 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1603 counter, this->result)); 1604 switch (ir->cmp) { 1605 case ir_binop_equal: 1606 inst->conditional_mod = BRW_CONDITIONAL_Z; 1607 break; 1608 case ir_binop_nequal: 1609 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1610 break; 1611 case ir_binop_gequal: 1612 inst->conditional_mod = BRW_CONDITIONAL_GE; 1613 break; 1614 case ir_binop_lequal: 1615 inst->conditional_mod = BRW_CONDITIONAL_LE; 1616 break; 1617 case ir_binop_greater: 1618 inst->conditional_mod = BRW_CONDITIONAL_G; 1619 break; 1620 case ir_binop_less: 1621 inst->conditional_mod = BRW_CONDITIONAL_L; 1622 break; 1623 default: 1624 assert(!"not reached: unknown loop condition"); 1625 this->fail = true; 1626 break; 1627 } 1628 1629 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1630 inst->predicated = true; 1631 } 1632 1633 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1634 ir_instruction *ir = (ir_instruction *)iter.get(); 1635 1636 this->base_ir = ir; 1637 ir->accept(this); 1638 } 1639 1640 if (ir->increment) { 1641 this->base_ir = ir->increment; 1642 ir->increment->accept(this); 1643 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1644 } 1645 1646 emit(fs_inst(BRW_OPCODE_WHILE)); 1647} 1648 1649void 1650fs_visitor::visit(ir_loop_jump *ir) 1651{ 1652 switch (ir->mode) { 1653 case ir_loop_jump::jump_break: 1654 emit(fs_inst(BRW_OPCODE_BREAK)); 1655 break; 1656 case ir_loop_jump::jump_continue: 1657 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1658 break; 1659 } 1660} 1661 1662void 1663fs_visitor::visit(ir_call *ir) 1664{ 1665 assert(!"FINISHME"); 1666} 1667 1668void 1669fs_visitor::visit(ir_return *ir) 1670{ 1671 assert(!"FINISHME"); 1672} 1673 1674void 1675fs_visitor::visit(ir_function *ir) 1676{ 1677 /* Ignore function bodies other than main() -- we shouldn't see calls to 1678 * them since they should all be inlined before we get to ir_to_mesa. 1679 */ 1680 if (strcmp(ir->name, "main") == 0) { 1681 const ir_function_signature *sig; 1682 exec_list empty; 1683 1684 sig = ir->matching_signature(&empty); 1685 1686 assert(sig); 1687 1688 foreach_iter(exec_list_iterator, iter, sig->body) { 1689 ir_instruction *ir = (ir_instruction *)iter.get(); 1690 this->base_ir = ir; 1691 1692 ir->accept(this); 1693 } 1694 } 1695} 1696 1697void 1698fs_visitor::visit(ir_function_signature *ir) 1699{ 1700 assert(!"not reached"); 1701 (void)ir; 1702} 1703 1704fs_inst * 1705fs_visitor::emit(fs_inst inst) 1706{ 1707 fs_inst *list_inst = new(mem_ctx) fs_inst; 1708 *list_inst = inst; 1709 1710 list_inst->annotation = this->current_annotation; 1711 list_inst->ir = this->base_ir; 1712 1713 this->instructions.push_tail(list_inst); 1714 1715 return list_inst; 1716} 1717 1718/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1719void 1720fs_visitor::emit_dummy_fs() 1721{ 1722 /* Everyone's favorite color. */ 1723 emit(fs_inst(BRW_OPCODE_MOV, 1724 fs_reg(MRF, 2), 1725 fs_reg(1.0f))); 1726 emit(fs_inst(BRW_OPCODE_MOV, 1727 fs_reg(MRF, 3), 1728 fs_reg(0.0f))); 1729 emit(fs_inst(BRW_OPCODE_MOV, 1730 fs_reg(MRF, 4), 1731 fs_reg(1.0f))); 1732 emit(fs_inst(BRW_OPCODE_MOV, 1733 fs_reg(MRF, 5), 1734 fs_reg(0.0f))); 1735 1736 fs_inst *write; 1737 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1738 fs_reg(0), 1739 fs_reg(0))); 1740} 1741 1742/* The register location here is relative to the start of the URB 1743 * data. It will get adjusted to be a real location before 1744 * generate_code() time. 1745 */ 1746struct brw_reg 1747fs_visitor::interp_reg(int location, int channel) 1748{ 1749 int regnr = urb_setup[location] * 2 + channel / 2; 1750 int stride = (channel & 1) * 4; 1751 1752 assert(urb_setup[location] != -1); 1753 1754 return brw_vec1_grf(regnr, stride); 1755} 1756 1757/** Emits the interpolation for the varying inputs. */ 1758void 1759fs_visitor::emit_interpolation_setup_gen4() 1760{ 1761 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1762 1763 this->current_annotation = "compute pixel centers"; 1764 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1765 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1766 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1767 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1768 emit(fs_inst(BRW_OPCODE_ADD, 1769 this->pixel_x, 1770 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1771 fs_reg(brw_imm_v(0x10101010)))); 1772 emit(fs_inst(BRW_OPCODE_ADD, 1773 this->pixel_y, 1774 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1775 fs_reg(brw_imm_v(0x11001100)))); 1776 1777 this->current_annotation = "compute pixel deltas from v0"; 1778 if (brw->has_pln) { 1779 this->delta_x = fs_reg(this, glsl_type::vec2_type); 1780 this->delta_y = this->delta_x; 1781 this->delta_y.reg_offset++; 1782 } else { 1783 this->delta_x = fs_reg(this, glsl_type::float_type); 1784 this->delta_y = fs_reg(this, glsl_type::float_type); 1785 } 1786 emit(fs_inst(BRW_OPCODE_ADD, 1787 this->delta_x, 1788 this->pixel_x, 1789 fs_reg(negate(brw_vec1_grf(1, 0))))); 1790 emit(fs_inst(BRW_OPCODE_ADD, 1791 this->delta_y, 1792 this->pixel_y, 1793 fs_reg(negate(brw_vec1_grf(1, 1))))); 1794 1795 this->current_annotation = "compute pos.w and 1/pos.w"; 1796 /* Compute wpos.w. It's always in our setup, since it's needed to 1797 * interpolate the other attributes. 1798 */ 1799 this->wpos_w = fs_reg(this, glsl_type::float_type); 1800 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1801 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1802 /* Compute the pixel 1/W value from wpos.w. */ 1803 this->pixel_w = fs_reg(this, glsl_type::float_type); 1804 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1805 this->current_annotation = NULL; 1806} 1807 1808/** Emits the interpolation for the varying inputs. */ 1809void 1810fs_visitor::emit_interpolation_setup_gen6() 1811{ 1812 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1813 1814 /* If the pixel centers end up used, the setup is the same as for gen4. */ 1815 this->current_annotation = "compute pixel centers"; 1816 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1817 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1818 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1819 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1820 emit(fs_inst(BRW_OPCODE_ADD, 1821 this->pixel_x, 1822 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1823 fs_reg(brw_imm_v(0x10101010)))); 1824 emit(fs_inst(BRW_OPCODE_ADD, 1825 this->pixel_y, 1826 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1827 fs_reg(brw_imm_v(0x11001100)))); 1828 1829 this->current_annotation = "compute 1/pos.w"; 1830 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); 1831 this->pixel_w = fs_reg(this, glsl_type::float_type); 1832 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1833 1834 this->delta_x = fs_reg(brw_vec8_grf(2, 0)); 1835 this->delta_y = fs_reg(brw_vec8_grf(3, 0)); 1836 1837 this->current_annotation = NULL; 1838} 1839 1840void 1841fs_visitor::emit_fb_writes() 1842{ 1843 this->current_annotation = "FB write header"; 1844 int nr = 0; 1845 1846 /* m0, m1 header */ 1847 nr += 2; 1848 1849 if (c->key.aa_dest_stencil_reg) { 1850 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1851 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1852 } 1853 1854 /* Reserve space for color. It'll be filled in per MRT below. */ 1855 int color_mrf = nr; 1856 nr += 4; 1857 1858 if (c->key.source_depth_to_render_target) { 1859 if (c->key.computes_depth) { 1860 /* Hand over gl_FragDepth. */ 1861 assert(this->frag_depth); 1862 fs_reg depth = *(variable_storage(this->frag_depth)); 1863 1864 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1865 } else { 1866 /* Pass through the payload depth. */ 1867 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1868 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1869 } 1870 } 1871 1872 if (c->key.dest_depth_reg) { 1873 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1874 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1875 } 1876 1877 fs_reg color = reg_undef; 1878 if (this->frag_color) 1879 color = *(variable_storage(this->frag_color)); 1880 else if (this->frag_data) 1881 color = *(variable_storage(this->frag_data)); 1882 1883 for (int target = 0; target < c->key.nr_color_regions; target++) { 1884 this->current_annotation = talloc_asprintf(this->mem_ctx, 1885 "FB write target %d", 1886 target); 1887 if (this->frag_color || this->frag_data) { 1888 for (int i = 0; i < 4; i++) { 1889 emit(fs_inst(BRW_OPCODE_MOV, 1890 fs_reg(MRF, color_mrf + i), 1891 color)); 1892 color.reg_offset++; 1893 } 1894 } 1895 1896 if (this->frag_color) 1897 color.reg_offset -= 4; 1898 1899 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1900 reg_undef, reg_undef)); 1901 inst->target = target; 1902 inst->mlen = nr; 1903 if (target == c->key.nr_color_regions - 1) 1904 inst->eot = true; 1905 } 1906 1907 if (c->key.nr_color_regions == 0) { 1908 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1909 reg_undef, reg_undef)); 1910 inst->mlen = nr; 1911 inst->eot = true; 1912 } 1913 1914 this->current_annotation = NULL; 1915} 1916 1917void 1918fs_visitor::generate_fb_write(fs_inst *inst) 1919{ 1920 GLboolean eot = inst->eot; 1921 1922 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1923 * move, here's g1. 1924 */ 1925 brw_push_insn_state(p); 1926 brw_set_mask_control(p, BRW_MASK_DISABLE); 1927 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1928 brw_MOV(p, 1929 brw_message_reg(1), 1930 brw_vec8_grf(1, 0)); 1931 brw_pop_insn_state(p); 1932 1933 brw_fb_WRITE(p, 1934 8, /* dispatch_width */ 1935 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1936 0, /* base MRF */ 1937 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1938 inst->target, 1939 inst->mlen, 1940 0, 1941 eot); 1942} 1943 1944void 1945fs_visitor::generate_linterp(fs_inst *inst, 1946 struct brw_reg dst, struct brw_reg *src) 1947{ 1948 struct brw_reg delta_x = src[0]; 1949 struct brw_reg delta_y = src[1]; 1950 struct brw_reg interp = src[2]; 1951 1952 if (brw->has_pln && 1953 delta_y.nr == delta_x.nr + 1 && 1954 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1955 brw_PLN(p, dst, interp, delta_x); 1956 } else { 1957 brw_LINE(p, brw_null_reg(), interp, delta_x); 1958 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1959 } 1960} 1961 1962void 1963fs_visitor::generate_math(fs_inst *inst, 1964 struct brw_reg dst, struct brw_reg *src) 1965{ 1966 int op; 1967 1968 switch (inst->opcode) { 1969 case FS_OPCODE_RCP: 1970 op = BRW_MATH_FUNCTION_INV; 1971 break; 1972 case FS_OPCODE_RSQ: 1973 op = BRW_MATH_FUNCTION_RSQ; 1974 break; 1975 case FS_OPCODE_SQRT: 1976 op = BRW_MATH_FUNCTION_SQRT; 1977 break; 1978 case FS_OPCODE_EXP2: 1979 op = BRW_MATH_FUNCTION_EXP; 1980 break; 1981 case FS_OPCODE_LOG2: 1982 op = BRW_MATH_FUNCTION_LOG; 1983 break; 1984 case FS_OPCODE_POW: 1985 op = BRW_MATH_FUNCTION_POW; 1986 break; 1987 case FS_OPCODE_SIN: 1988 op = BRW_MATH_FUNCTION_SIN; 1989 break; 1990 case FS_OPCODE_COS: 1991 op = BRW_MATH_FUNCTION_COS; 1992 break; 1993 default: 1994 assert(!"not reached: unknown math function"); 1995 op = 0; 1996 break; 1997 } 1998 1999 if (inst->opcode == FS_OPCODE_POW) { 2000 brw_MOV(p, brw_message_reg(3), src[1]); 2001 } 2002 2003 brw_math(p, dst, 2004 op, 2005 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 2006 BRW_MATH_SATURATE_NONE, 2007 2, src[0], 2008 BRW_MATH_DATA_VECTOR, 2009 BRW_MATH_PRECISION_FULL); 2010} 2011 2012void 2013fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2014{ 2015 int msg_type = -1; 2016 int rlen = 4; 2017 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; 2018 2019 if (intel->gen == 5) { 2020 switch (inst->opcode) { 2021 case FS_OPCODE_TEX: 2022 if (inst->shadow_compare) { 2023 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 2024 } else { 2025 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 2026 } 2027 break; 2028 case FS_OPCODE_TXB: 2029 if (inst->shadow_compare) { 2030 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 2031 } else { 2032 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 2033 } 2034 break; 2035 } 2036 } else { 2037 switch (inst->opcode) { 2038 case FS_OPCODE_TEX: 2039 /* Note that G45 and older determines shadow compare and dispatch width 2040 * from message length for most messages. 2041 */ 2042 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2043 if (inst->shadow_compare) { 2044 assert(inst->mlen == 5); 2045 } else { 2046 assert(inst->mlen <= 6); 2047 } 2048 break; 2049 case FS_OPCODE_TXB: 2050 if (inst->shadow_compare) { 2051 assert(inst->mlen == 5); 2052 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; 2053 } else { 2054 assert(inst->mlen == 8); 2055 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 2056 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; 2057 } 2058 break; 2059 } 2060 } 2061 assert(msg_type != -1); 2062 2063 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { 2064 rlen = 8; 2065 dst = vec16(dst); 2066 } 2067 2068 /* g0 header. */ 2069 src.nr--; 2070 2071 brw_SAMPLE(p, 2072 retype(dst, BRW_REGISTER_TYPE_UW), 2073 src.nr, 2074 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 2075 SURF_INDEX_TEXTURE(inst->sampler), 2076 inst->sampler, 2077 WRITEMASK_XYZW, 2078 msg_type, 2079 rlen, 2080 inst->mlen + 1, 2081 0, 2082 1, 2083 simd_mode); 2084} 2085 2086 2087/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 2088 * looking like: 2089 * 2090 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 2091 * 2092 * and we're trying to produce: 2093 * 2094 * DDX DDY 2095 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 2096 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 2097 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 2098 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 2099 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 2100 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 2101 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 2102 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 2103 * 2104 * and add another set of two more subspans if in 16-pixel dispatch mode. 2105 * 2106 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 2107 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 2108 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 2109 * between each other. We could probably do it like ddx and swizzle the right 2110 * order later, but bail for now and just produce 2111 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 2112 */ 2113void 2114fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2115{ 2116 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 2117 BRW_REGISTER_TYPE_F, 2118 BRW_VERTICAL_STRIDE_2, 2119 BRW_WIDTH_2, 2120 BRW_HORIZONTAL_STRIDE_0, 2121 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2122 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 2123 BRW_REGISTER_TYPE_F, 2124 BRW_VERTICAL_STRIDE_2, 2125 BRW_WIDTH_2, 2126 BRW_HORIZONTAL_STRIDE_0, 2127 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2128 brw_ADD(p, dst, src0, negate(src1)); 2129} 2130 2131void 2132fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 2133{ 2134 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 2135 BRW_REGISTER_TYPE_F, 2136 BRW_VERTICAL_STRIDE_4, 2137 BRW_WIDTH_4, 2138 BRW_HORIZONTAL_STRIDE_0, 2139 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2140 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 2141 BRW_REGISTER_TYPE_F, 2142 BRW_VERTICAL_STRIDE_4, 2143 BRW_WIDTH_4, 2144 BRW_HORIZONTAL_STRIDE_0, 2145 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 2146 brw_ADD(p, dst, src0, negate(src1)); 2147} 2148 2149void 2150fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp) 2151{ 2152 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 2153 temp = brw_uw1_reg(temp.file, temp.nr, 0); 2154 2155 brw_push_insn_state(p); 2156 brw_set_mask_control(p, BRW_MASK_DISABLE); 2157 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */ 2158 brw_AND(p, g0, temp, g0); 2159 brw_pop_insn_state(p); 2160} 2161 2162void 2163fs_visitor::assign_curb_setup() 2164{ 2165 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 2166 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 2167 2168 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 2169 foreach_iter(exec_list_iterator, iter, this->instructions) { 2170 fs_inst *inst = (fs_inst *)iter.get(); 2171 2172 for (unsigned int i = 0; i < 3; i++) { 2173 if (inst->src[i].file == UNIFORM) { 2174 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 2175 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 2176 constant_nr / 8, 2177 constant_nr % 8); 2178 2179 inst->src[i].file = FIXED_HW_REG; 2180 inst->src[i].fixed_hw_reg = brw_reg; 2181 } 2182 } 2183 } 2184} 2185 2186void 2187fs_visitor::calculate_urb_setup() 2188{ 2189 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2190 urb_setup[i] = -1; 2191 } 2192 2193 int urb_next = 0; 2194 /* Figure out where each of the incoming setup attributes lands. */ 2195 if (intel->gen >= 6) { 2196 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 2197 if (i == FRAG_ATTRIB_WPOS || 2198 (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) { 2199 urb_setup[i] = urb_next++; 2200 } 2201 } 2202 } else { 2203 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ 2204 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) { 2205 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { 2206 int fp_index; 2207 2208 if (i >= VERT_RESULT_VAR0) 2209 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); 2210 else if (i <= VERT_RESULT_TEX7) 2211 fp_index = i; 2212 else 2213 fp_index = -1; 2214 2215 if (fp_index >= 0) 2216 urb_setup[fp_index] = urb_next++; 2217 } 2218 } 2219 } 2220 2221 /* Each attribute is 4 setup channels, each of which is half a reg. */ 2222 c->prog_data.urb_read_length = urb_next * 2; 2223} 2224 2225void 2226fs_visitor::assign_urb_setup() 2227{ 2228 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 2229 2230 /* Offset all the urb_setup[] index by the actual position of the 2231 * setup regs, now that the location of the constants has been chosen. 2232 */ 2233 foreach_iter(exec_list_iterator, iter, this->instructions) { 2234 fs_inst *inst = (fs_inst *)iter.get(); 2235 2236 if (inst->opcode != FS_OPCODE_LINTERP) 2237 continue; 2238 2239 assert(inst->src[2].file == FIXED_HW_REG); 2240 2241 inst->src[2].fixed_hw_reg.nr += urb_start; 2242 } 2243 2244 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 2245} 2246 2247static void 2248assign_reg(int *reg_hw_locations, fs_reg *reg) 2249{ 2250 if (reg->file == GRF && reg->reg != 0) { 2251 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; 2252 reg->reg = 0; 2253 } 2254} 2255 2256void 2257fs_visitor::assign_regs_trivial() 2258{ 2259 int last_grf = 0; 2260 int hw_reg_mapping[this->virtual_grf_next]; 2261 int i; 2262 2263 hw_reg_mapping[0] = 0; 2264 hw_reg_mapping[1] = this->first_non_payload_grf; 2265 for (i = 2; i < this->virtual_grf_next; i++) { 2266 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + 2267 this->virtual_grf_sizes[i - 1]); 2268 } 2269 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; 2270 2271 foreach_iter(exec_list_iterator, iter, this->instructions) { 2272 fs_inst *inst = (fs_inst *)iter.get(); 2273 2274 assign_reg(hw_reg_mapping, &inst->dst); 2275 assign_reg(hw_reg_mapping, &inst->src[0]); 2276 assign_reg(hw_reg_mapping, &inst->src[1]); 2277 } 2278 2279 this->grf_used = last_grf + 1; 2280} 2281 2282void 2283fs_visitor::assign_regs() 2284{ 2285 int last_grf = 0; 2286 int hw_reg_mapping[this->virtual_grf_next + 1]; 2287 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; 2288 int class_sizes[base_reg_count]; 2289 int class_count = 0; 2290 int aligned_pair_class = -1; 2291 2292 calculate_live_intervals(); 2293 2294 /* Set up the register classes. 2295 * 2296 * The base registers store a scalar value. For texture samples, 2297 * we get virtual GRFs composed of 4 contiguous hw register. For 2298 * structures and arrays, we store them as contiguous larger things 2299 * than that, though we should be able to do better most of the 2300 * time. 2301 */ 2302 class_sizes[class_count++] = 1; 2303 if (brw->has_pln && intel->gen < 6) { 2304 /* Always set up the (unaligned) pairs for gen5, so we can find 2305 * them for making the aligned pair class. 2306 */ 2307 class_sizes[class_count++] = 2; 2308 } 2309 for (int r = 1; r < this->virtual_grf_next; r++) { 2310 int i; 2311 2312 for (i = 0; i < class_count; i++) { 2313 if (class_sizes[i] == this->virtual_grf_sizes[r]) 2314 break; 2315 } 2316 if (i == class_count) { 2317 if (this->virtual_grf_sizes[r] >= base_reg_count) { 2318 fprintf(stderr, "Object too large to register allocate.\n"); 2319 this->fail = true; 2320 } 2321 2322 class_sizes[class_count++] = this->virtual_grf_sizes[r]; 2323 } 2324 } 2325 2326 int ra_reg_count = 0; 2327 int class_base_reg[class_count]; 2328 int class_reg_count[class_count]; 2329 int classes[class_count + 1]; 2330 2331 for (int i = 0; i < class_count; i++) { 2332 class_base_reg[i] = ra_reg_count; 2333 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); 2334 ra_reg_count += class_reg_count[i]; 2335 } 2336 2337 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); 2338 for (int i = 0; i < class_count; i++) { 2339 classes[i] = ra_alloc_reg_class(regs); 2340 2341 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2342 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); 2343 } 2344 2345 /* Add conflicts between our contiguous registers aliasing 2346 * base regs and other register classes' contiguous registers 2347 * that alias base regs, or the base regs themselves for classes[0]. 2348 */ 2349 for (int c = 0; c <= i; c++) { 2350 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { 2351 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); 2352 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); 2353 c_r++) { 2354 2355 if (0) { 2356 printf("%d/%d conflicts %d/%d\n", 2357 class_sizes[i], this->first_non_payload_grf + i_r, 2358 class_sizes[c], this->first_non_payload_grf + c_r); 2359 } 2360 2361 ra_add_reg_conflict(regs, 2362 class_base_reg[i] + i_r, 2363 class_base_reg[c] + c_r); 2364 } 2365 } 2366 } 2367 } 2368 2369 /* Add a special class for aligned pairs, which we'll put delta_x/y 2370 * in on gen5 so that we can do PLN. 2371 */ 2372 if (brw->has_pln && intel->gen < 6) { 2373 int reg_count = (base_reg_count - 1) / 2; 2374 int unaligned_pair_class = 1; 2375 assert(class_sizes[unaligned_pair_class] == 2); 2376 2377 aligned_pair_class = class_count; 2378 classes[aligned_pair_class] = ra_alloc_reg_class(regs); 2379 class_base_reg[aligned_pair_class] = 0; 2380 class_reg_count[aligned_pair_class] = 0; 2381 int start = (this->first_non_payload_grf & 1) ? 1 : 0; 2382 2383 for (int i = 0; i < reg_count; i++) { 2384 ra_class_add_reg(regs, classes[aligned_pair_class], 2385 class_base_reg[unaligned_pair_class] + i * 2 + start); 2386 } 2387 class_count++; 2388 } 2389 2390 ra_set_finalize(regs); 2391 2392 struct ra_graph *g = ra_alloc_interference_graph(regs, 2393 this->virtual_grf_next); 2394 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 2395 * with nodes. 2396 */ 2397 ra_set_node_class(g, 0, classes[0]); 2398 2399 for (int i = 1; i < this->virtual_grf_next; i++) { 2400 for (int c = 0; c < class_count; c++) { 2401 if (class_sizes[c] == this->virtual_grf_sizes[i]) { 2402 if (aligned_pair_class >= 0 && 2403 this->delta_x.reg == i) { 2404 ra_set_node_class(g, i, classes[aligned_pair_class]); 2405 } else { 2406 ra_set_node_class(g, i, classes[c]); 2407 } 2408 break; 2409 } 2410 } 2411 2412 for (int j = 1; j < i; j++) { 2413 if (virtual_grf_interferes(i, j)) { 2414 ra_add_node_interference(g, i, j); 2415 } 2416 } 2417 } 2418 2419 /* FINISHME: Handle spilling */ 2420 if (!ra_allocate_no_spills(g)) { 2421 fprintf(stderr, "Failed to allocate registers.\n"); 2422 this->fail = true; 2423 return; 2424 } 2425 2426 /* Get the chosen virtual registers for each node, and map virtual 2427 * regs in the register classes back down to real hardware reg 2428 * numbers. 2429 */ 2430 hw_reg_mapping[0] = 0; /* unused */ 2431 for (int i = 1; i < this->virtual_grf_next; i++) { 2432 int reg = ra_get_node_reg(g, i); 2433 int hw_reg = -1; 2434 2435 for (int c = 0; c < class_count; c++) { 2436 if (reg >= class_base_reg[c] && 2437 reg < class_base_reg[c] + class_reg_count[c]) { 2438 hw_reg = reg - class_base_reg[c]; 2439 break; 2440 } 2441 } 2442 2443 assert(hw_reg != -1); 2444 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; 2445 last_grf = MAX2(last_grf, 2446 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); 2447 } 2448 2449 foreach_iter(exec_list_iterator, iter, this->instructions) { 2450 fs_inst *inst = (fs_inst *)iter.get(); 2451 2452 assign_reg(hw_reg_mapping, &inst->dst); 2453 assign_reg(hw_reg_mapping, &inst->src[0]); 2454 assign_reg(hw_reg_mapping, &inst->src[1]); 2455 } 2456 2457 this->grf_used = last_grf + 1; 2458 2459 talloc_free(g); 2460 talloc_free(regs); 2461} 2462 2463void 2464fs_visitor::calculate_live_intervals() 2465{ 2466 int num_vars = this->virtual_grf_next; 2467 int *def = talloc_array(mem_ctx, int, num_vars); 2468 int *use = talloc_array(mem_ctx, int, num_vars); 2469 int loop_depth = 0; 2470 int loop_start = 0; 2471 2472 for (int i = 0; i < num_vars; i++) { 2473 def[i] = 1 << 30; 2474 use[i] = 0; 2475 } 2476 2477 int ip = 0; 2478 foreach_iter(exec_list_iterator, iter, this->instructions) { 2479 fs_inst *inst = (fs_inst *)iter.get(); 2480 2481 if (inst->opcode == BRW_OPCODE_DO) { 2482 if (loop_depth++ == 0) 2483 loop_start = ip; 2484 } else if (inst->opcode == BRW_OPCODE_WHILE) { 2485 loop_depth--; 2486 2487 if (loop_depth == 0) { 2488 /* FINISHME: 2489 * 2490 * Patches up any vars marked for use within the loop as 2491 * live until the end. This is conservative, as there 2492 * will often be variables defined and used inside the 2493 * loop but dead at the end of the loop body. 2494 */ 2495 for (int i = 0; i < num_vars; i++) { 2496 if (use[i] == loop_start) { 2497 use[i] = ip; 2498 } 2499 } 2500 } 2501 } else { 2502 int eip = ip; 2503 2504 if (loop_depth) 2505 eip = loop_start; 2506 2507 for (unsigned int i = 0; i < 3; i++) { 2508 if (inst->src[i].file == GRF && inst->src[i].reg != 0) { 2509 def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip); 2510 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip); 2511 } 2512 } 2513 if (inst->dst.file == GRF && inst->dst.reg != 0) { 2514 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip); 2515 use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip); 2516 } 2517 } 2518 2519 ip++; 2520 } 2521 2522 this->virtual_grf_def = def; 2523 this->virtual_grf_use = use; 2524} 2525 2526bool 2527fs_visitor::virtual_grf_interferes(int a, int b) 2528{ 2529 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 2530 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 2531 2532 return start <= end; 2533} 2534 2535static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 2536{ 2537 struct brw_reg brw_reg; 2538 2539 switch (reg->file) { 2540 case GRF: 2541 case ARF: 2542 case MRF: 2543 brw_reg = brw_vec8_reg(reg->file, 2544 reg->hw_reg, 0); 2545 brw_reg = retype(brw_reg, reg->type); 2546 break; 2547 case IMM: 2548 switch (reg->type) { 2549 case BRW_REGISTER_TYPE_F: 2550 brw_reg = brw_imm_f(reg->imm.f); 2551 break; 2552 case BRW_REGISTER_TYPE_D: 2553 brw_reg = brw_imm_d(reg->imm.i); 2554 break; 2555 case BRW_REGISTER_TYPE_UD: 2556 brw_reg = brw_imm_ud(reg->imm.u); 2557 break; 2558 default: 2559 assert(!"not reached"); 2560 break; 2561 } 2562 break; 2563 case FIXED_HW_REG: 2564 brw_reg = reg->fixed_hw_reg; 2565 break; 2566 case BAD_FILE: 2567 /* Probably unused. */ 2568 brw_reg = brw_null_reg(); 2569 break; 2570 case UNIFORM: 2571 assert(!"not reached"); 2572 brw_reg = brw_null_reg(); 2573 break; 2574 } 2575 if (reg->abs) 2576 brw_reg = brw_abs(brw_reg); 2577 if (reg->negate) 2578 brw_reg = negate(brw_reg); 2579 2580 return brw_reg; 2581} 2582 2583void 2584fs_visitor::generate_code() 2585{ 2586 unsigned int annotation_len = 0; 2587 int last_native_inst = 0; 2588 struct brw_instruction *if_stack[16], *loop_stack[16]; 2589 int if_stack_depth = 0, loop_stack_depth = 0; 2590 int if_depth_in_loop[16]; 2591 2592 if_depth_in_loop[loop_stack_depth] = 0; 2593 2594 memset(&if_stack, 0, sizeof(if_stack)); 2595 foreach_iter(exec_list_iterator, iter, this->instructions) { 2596 fs_inst *inst = (fs_inst *)iter.get(); 2597 struct brw_reg src[3], dst; 2598 2599 for (unsigned int i = 0; i < 3; i++) { 2600 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 2601 } 2602 dst = brw_reg_from_fs_reg(&inst->dst); 2603 2604 brw_set_conditionalmod(p, inst->conditional_mod); 2605 brw_set_predicate_control(p, inst->predicated); 2606 2607 switch (inst->opcode) { 2608 case BRW_OPCODE_MOV: 2609 brw_MOV(p, dst, src[0]); 2610 break; 2611 case BRW_OPCODE_ADD: 2612 brw_ADD(p, dst, src[0], src[1]); 2613 break; 2614 case BRW_OPCODE_MUL: 2615 brw_MUL(p, dst, src[0], src[1]); 2616 break; 2617 2618 case BRW_OPCODE_FRC: 2619 brw_FRC(p, dst, src[0]); 2620 break; 2621 case BRW_OPCODE_RNDD: 2622 brw_RNDD(p, dst, src[0]); 2623 break; 2624 case BRW_OPCODE_RNDZ: 2625 brw_RNDZ(p, dst, src[0]); 2626 break; 2627 2628 case BRW_OPCODE_AND: 2629 brw_AND(p, dst, src[0], src[1]); 2630 break; 2631 case BRW_OPCODE_OR: 2632 brw_OR(p, dst, src[0], src[1]); 2633 break; 2634 case BRW_OPCODE_XOR: 2635 brw_XOR(p, dst, src[0], src[1]); 2636 break; 2637 2638 case BRW_OPCODE_CMP: 2639 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2640 break; 2641 case BRW_OPCODE_SEL: 2642 brw_SEL(p, dst, src[0], src[1]); 2643 break; 2644 2645 case BRW_OPCODE_IF: 2646 assert(if_stack_depth < 16); 2647 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2648 if_depth_in_loop[loop_stack_depth]++; 2649 if_stack_depth++; 2650 break; 2651 case BRW_OPCODE_ELSE: 2652 if_stack[if_stack_depth - 1] = 2653 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2654 break; 2655 case BRW_OPCODE_ENDIF: 2656 if_stack_depth--; 2657 brw_ENDIF(p , if_stack[if_stack_depth]); 2658 if_depth_in_loop[loop_stack_depth]--; 2659 break; 2660 2661 case BRW_OPCODE_DO: 2662 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2663 if_depth_in_loop[loop_stack_depth] = 0; 2664 break; 2665 2666 case BRW_OPCODE_BREAK: 2667 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2668 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2669 break; 2670 case BRW_OPCODE_CONTINUE: 2671 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2672 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2673 break; 2674 2675 case BRW_OPCODE_WHILE: { 2676 struct brw_instruction *inst0, *inst1; 2677 GLuint br = 1; 2678 2679 if (intel->gen >= 5) 2680 br = 2; 2681 2682 assert(loop_stack_depth > 0); 2683 loop_stack_depth--; 2684 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2685 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2686 while (inst0 > loop_stack[loop_stack_depth]) { 2687 inst0--; 2688 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2689 inst0->bits3.if_else.jump_count == 0) { 2690 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2691 } 2692 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2693 inst0->bits3.if_else.jump_count == 0) { 2694 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2695 } 2696 } 2697 } 2698 break; 2699 2700 case FS_OPCODE_RCP: 2701 case FS_OPCODE_RSQ: 2702 case FS_OPCODE_SQRT: 2703 case FS_OPCODE_EXP2: 2704 case FS_OPCODE_LOG2: 2705 case FS_OPCODE_POW: 2706 case FS_OPCODE_SIN: 2707 case FS_OPCODE_COS: 2708 generate_math(inst, dst, src); 2709 break; 2710 case FS_OPCODE_LINTERP: 2711 generate_linterp(inst, dst, src); 2712 break; 2713 case FS_OPCODE_TEX: 2714 case FS_OPCODE_TXB: 2715 case FS_OPCODE_TXL: 2716 generate_tex(inst, dst, src[0]); 2717 break; 2718 case FS_OPCODE_DISCARD: 2719 generate_discard(inst, dst /* src0 == dst */); 2720 break; 2721 case FS_OPCODE_DDX: 2722 generate_ddx(inst, dst, src[0]); 2723 break; 2724 case FS_OPCODE_DDY: 2725 generate_ddy(inst, dst, src[0]); 2726 break; 2727 case FS_OPCODE_FB_WRITE: 2728 generate_fb_write(inst); 2729 break; 2730 default: 2731 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2732 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2733 brw_opcodes[inst->opcode].name); 2734 } else { 2735 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2736 } 2737 this->fail = true; 2738 } 2739 2740 if (annotation_len < p->nr_insn) { 2741 annotation_len *= 2; 2742 if (annotation_len < 16) 2743 annotation_len = 16; 2744 2745 this->annotation_string = talloc_realloc(this->mem_ctx, 2746 annotation_string, 2747 const char *, 2748 annotation_len); 2749 this->annotation_ir = talloc_realloc(this->mem_ctx, 2750 annotation_ir, 2751 ir_instruction *, 2752 annotation_len); 2753 } 2754 2755 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2756 this->annotation_string[i] = inst->annotation; 2757 this->annotation_ir[i] = inst->ir; 2758 } 2759 last_native_inst = p->nr_insn; 2760 } 2761} 2762 2763GLboolean 2764brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2765{ 2766 struct brw_compile *p = &c->func; 2767 struct intel_context *intel = &brw->intel; 2768 GLcontext *ctx = &intel->ctx; 2769 struct brw_shader *shader = NULL; 2770 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2771 2772 if (!prog) 2773 return GL_FALSE; 2774 2775 if (!using_new_fs) 2776 return GL_FALSE; 2777 2778 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2779 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2780 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2781 break; 2782 } 2783 } 2784 if (!shader) 2785 return GL_FALSE; 2786 2787 /* We always use 8-wide mode, at least for now. For one, flow 2788 * control only works in 8-wide. Also, when we're fragment shader 2789 * bound, we're almost always under register pressure as well, so 2790 * 8-wide would save us from the performance cliff of spilling 2791 * regs. 2792 */ 2793 c->dispatch_width = 8; 2794 2795 if (INTEL_DEBUG & DEBUG_WM) { 2796 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2797 _mesa_print_ir(shader->ir, NULL); 2798 printf("\n"); 2799 } 2800 2801 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2802 */ 2803 fs_visitor v(c, shader); 2804 2805 if (0) { 2806 v.emit_dummy_fs(); 2807 } else { 2808 v.calculate_urb_setup(); 2809 if (intel->gen < 6) 2810 v.emit_interpolation_setup_gen4(); 2811 else 2812 v.emit_interpolation_setup_gen6(); 2813 2814 /* Generate FS IR for main(). (the visitor only descends into 2815 * functions called "main"). 2816 */ 2817 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2818 ir_instruction *ir = (ir_instruction *)iter.get(); 2819 v.base_ir = ir; 2820 ir->accept(&v); 2821 } 2822 2823 v.emit_fb_writes(); 2824 v.assign_curb_setup(); 2825 v.assign_urb_setup(); 2826 if (0) 2827 v.assign_regs_trivial(); 2828 else 2829 v.assign_regs(); 2830 } 2831 2832 if (!v.fail) 2833 v.generate_code(); 2834 2835 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2836 2837 if (v.fail) 2838 return GL_FALSE; 2839 2840 if (INTEL_DEBUG & DEBUG_WM) { 2841 const char *last_annotation_string = NULL; 2842 ir_instruction *last_annotation_ir = NULL; 2843 2844 printf("Native code for fragment shader %d:\n", prog->Name); 2845 for (unsigned int i = 0; i < p->nr_insn; i++) { 2846 if (last_annotation_ir != v.annotation_ir[i]) { 2847 last_annotation_ir = v.annotation_ir[i]; 2848 if (last_annotation_ir) { 2849 printf(" "); 2850 last_annotation_ir->print(); 2851 printf("\n"); 2852 } 2853 } 2854 if (last_annotation_string != v.annotation_string[i]) { 2855 last_annotation_string = v.annotation_string[i]; 2856 if (last_annotation_string) 2857 printf(" %s\n", last_annotation_string); 2858 } 2859 brw_disasm(stdout, &p->store[i], intel->gen); 2860 } 2861 printf("\n"); 2862 } 2863 2864 c->prog_data.total_grf = v.grf_used; 2865 c->prog_data.total_scratch = 0; 2866 2867 return GL_TRUE; 2868} 2869