brw_fs.cpp revision 2999a44968a045b5516ff23d70b711b01bd696a5
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "program/prog_parameter.h" 35#include "program/prog_print.h" 36#include "program/prog_optimize.h" 37#include "program/sampler.h" 38#include "program/hash_table.h" 39#include "brw_context.h" 40#include "brw_eu.h" 41#include "brw_wm.h" 42#include "talloc.h" 43} 44#include "../glsl/glsl_types.h" 45#include "../glsl/ir_optimization.h" 46#include "../glsl/ir_print_visitor.h" 47 48enum register_file { 49 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 50 GRF = BRW_GENERAL_REGISTER_FILE, 51 MRF = BRW_MESSAGE_REGISTER_FILE, 52 IMM = BRW_IMMEDIATE_VALUE, 53 FIXED_HW_REG, /* a struct brw_reg */ 54 UNIFORM, /* prog_data->params[hw_reg] */ 55 BAD_FILE 56}; 57 58enum fs_opcodes { 59 FS_OPCODE_FB_WRITE = 256, 60 FS_OPCODE_RCP, 61 FS_OPCODE_RSQ, 62 FS_OPCODE_SQRT, 63 FS_OPCODE_EXP2, 64 FS_OPCODE_LOG2, 65 FS_OPCODE_POW, 66 FS_OPCODE_SIN, 67 FS_OPCODE_COS, 68 FS_OPCODE_DDX, 69 FS_OPCODE_DDY, 70 FS_OPCODE_LINTERP, 71 FS_OPCODE_TEX, 72 FS_OPCODE_TXB, 73 FS_OPCODE_TXL, 74 FS_OPCODE_DISCARD, 75}; 76 77static int using_new_fs = -1; 78 79struct gl_shader * 80brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 81{ 82 struct brw_shader *shader; 83 84 shader = talloc_zero(NULL, struct brw_shader); 85 if (shader) { 86 shader->base.Type = type; 87 shader->base.Name = name; 88 _mesa_init_shader(ctx, &shader->base); 89 } 90 91 return &shader->base; 92} 93 94struct gl_shader_program * 95brw_new_shader_program(GLcontext *ctx, GLuint name) 96{ 97 struct brw_shader_program *prog; 98 prog = talloc_zero(NULL, struct brw_shader_program); 99 if (prog) { 100 prog->base.Name = name; 101 _mesa_init_shader_program(ctx, &prog->base); 102 } 103 return &prog->base; 104} 105 106GLboolean 107brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 108{ 109 if (!_mesa_ir_compile_shader(ctx, shader)) 110 return GL_FALSE; 111 112 return GL_TRUE; 113} 114 115GLboolean 116brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 117{ 118 if (using_new_fs == -1) 119 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 120 121 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 122 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 123 124 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 125 void *mem_ctx = talloc_new(NULL); 126 bool progress; 127 128 if (shader->ir) 129 talloc_free(shader->ir); 130 shader->ir = new(shader) exec_list; 131 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 132 133 do_mat_op_to_vec(shader->ir); 134 do_mod_to_fract(shader->ir); 135 do_div_to_mul_rcp(shader->ir); 136 do_sub_to_add_neg(shader->ir); 137 do_explog_to_explog2(shader->ir); 138 139 do { 140 progress = false; 141 142 brw_do_channel_expressions(shader->ir); 143 brw_do_vector_splitting(shader->ir); 144 145 progress = do_lower_jumps(shader->ir, true, true, 146 true, /* main return */ 147 false, /* continue */ 148 false /* loops */ 149 ) || progress; 150 151 progress = do_common_optimization(shader->ir, true, 32) || progress; 152 153 progress = lower_noise(shader->ir) || progress; 154 progress = 155 lower_variable_index_to_cond_assign(shader->ir, 156 GL_TRUE, /* input */ 157 GL_TRUE, /* output */ 158 GL_TRUE, /* temp */ 159 GL_TRUE /* uniform */ 160 ) || progress; 161 } while (progress); 162 163 validate_ir_tree(shader->ir); 164 165 reparent_ir(shader->ir, shader->ir); 166 talloc_free(mem_ctx); 167 } 168 } 169 170 if (!_mesa_ir_link_shader(ctx, prog)) 171 return GL_FALSE; 172 173 return GL_TRUE; 174} 175 176static int 177type_size(const struct glsl_type *type) 178{ 179 unsigned int size, i; 180 181 switch (type->base_type) { 182 case GLSL_TYPE_UINT: 183 case GLSL_TYPE_INT: 184 case GLSL_TYPE_FLOAT: 185 case GLSL_TYPE_BOOL: 186 return type->components(); 187 case GLSL_TYPE_ARRAY: 188 /* FINISHME: uniform/varying arrays. */ 189 return type_size(type->fields.array) * type->length; 190 case GLSL_TYPE_STRUCT: 191 size = 0; 192 for (i = 0; i < type->length; i++) { 193 size += type_size(type->fields.structure[i].type); 194 } 195 return size; 196 case GLSL_TYPE_SAMPLER: 197 /* Samplers take up no register space, since they're baked in at 198 * link time. 199 */ 200 return 0; 201 default: 202 assert(!"not reached"); 203 return 0; 204 } 205} 206 207class fs_reg { 208public: 209 /* Callers of this talloc-based new need not call delete. It's 210 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 211 static void* operator new(size_t size, void *ctx) 212 { 213 void *node; 214 215 node = talloc_size(ctx, size); 216 assert(node != NULL); 217 218 return node; 219 } 220 221 void init() 222 { 223 this->reg = 0; 224 this->reg_offset = 0; 225 this->negate = 0; 226 this->abs = 0; 227 this->hw_reg = -1; 228 } 229 230 /** Generic unset register constructor. */ 231 fs_reg() 232 { 233 init(); 234 this->file = BAD_FILE; 235 } 236 237 /** Immediate value constructor. */ 238 fs_reg(float f) 239 { 240 init(); 241 this->file = IMM; 242 this->type = BRW_REGISTER_TYPE_F; 243 this->imm.f = f; 244 } 245 246 /** Immediate value constructor. */ 247 fs_reg(int32_t i) 248 { 249 init(); 250 this->file = IMM; 251 this->type = BRW_REGISTER_TYPE_D; 252 this->imm.i = i; 253 } 254 255 /** Immediate value constructor. */ 256 fs_reg(uint32_t u) 257 { 258 init(); 259 this->file = IMM; 260 this->type = BRW_REGISTER_TYPE_UD; 261 this->imm.u = u; 262 } 263 264 /** Fixed brw_reg Immediate value constructor. */ 265 fs_reg(struct brw_reg fixed_hw_reg) 266 { 267 init(); 268 this->file = FIXED_HW_REG; 269 this->fixed_hw_reg = fixed_hw_reg; 270 this->type = fixed_hw_reg.type; 271 } 272 273 fs_reg(enum register_file file, int hw_reg); 274 fs_reg(class fs_visitor *v, const struct glsl_type *type); 275 276 /** Register file: ARF, GRF, MRF, IMM. */ 277 enum register_file file; 278 /** Abstract register number. 0 = fixed hw reg */ 279 int reg; 280 /** Offset within the abstract register. */ 281 int reg_offset; 282 /** HW register number. Generally unset until register allocation. */ 283 int hw_reg; 284 /** Register type. BRW_REGISTER_TYPE_* */ 285 int type; 286 bool negate; 287 bool abs; 288 struct brw_reg fixed_hw_reg; 289 290 /** Value for file == BRW_IMMMEDIATE_FILE */ 291 union { 292 int32_t i; 293 uint32_t u; 294 float f; 295 } imm; 296}; 297 298static const fs_reg reg_undef; 299static const fs_reg reg_null(ARF, BRW_ARF_NULL); 300 301class fs_inst : public exec_node { 302public: 303 /* Callers of this talloc-based new need not call delete. It's 304 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 305 static void* operator new(size_t size, void *ctx) 306 { 307 void *node; 308 309 node = talloc_zero_size(ctx, size); 310 assert(node != NULL); 311 312 return node; 313 } 314 315 void init() 316 { 317 this->opcode = BRW_OPCODE_NOP; 318 this->saturate = false; 319 this->conditional_mod = BRW_CONDITIONAL_NONE; 320 this->predicated = false; 321 this->sampler = 0; 322 this->shadow_compare = false; 323 } 324 325 fs_inst() 326 { 327 init(); 328 } 329 330 fs_inst(int opcode) 331 { 332 init(); 333 this->opcode = opcode; 334 } 335 336 fs_inst(int opcode, fs_reg dst, fs_reg src0) 337 { 338 init(); 339 this->opcode = opcode; 340 this->dst = dst; 341 this->src[0] = src0; 342 } 343 344 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 345 { 346 init(); 347 this->opcode = opcode; 348 this->dst = dst; 349 this->src[0] = src0; 350 this->src[1] = src1; 351 } 352 353 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 354 { 355 init(); 356 this->opcode = opcode; 357 this->dst = dst; 358 this->src[0] = src0; 359 this->src[1] = src1; 360 this->src[2] = src2; 361 } 362 363 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 364 fs_reg dst; 365 fs_reg src[3]; 366 bool saturate; 367 bool predicated; 368 int conditional_mod; /**< BRW_CONDITIONAL_* */ 369 370 int mlen; /** SEND message length */ 371 int sampler; 372 bool shadow_compare; 373 374 /** @{ 375 * Annotation for the generated IR. One of the two can be set. 376 */ 377 ir_instruction *ir; 378 const char *annotation; 379 /** @} */ 380}; 381 382class fs_visitor : public ir_visitor 383{ 384public: 385 386 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 387 { 388 this->c = c; 389 this->p = &c->func; 390 this->brw = p->brw; 391 this->fp = brw->fragment_program; 392 this->intel = &brw->intel; 393 this->ctx = &intel->ctx; 394 this->mem_ctx = talloc_new(NULL); 395 this->shader = shader; 396 this->fail = false; 397 this->next_abstract_grf = 1; 398 this->variable_ht = hash_table_ctor(0, 399 hash_table_pointer_hash, 400 hash_table_pointer_compare); 401 402 this->frag_color = NULL; 403 this->frag_data = NULL; 404 this->frag_depth = NULL; 405 this->first_non_payload_grf = 0; 406 407 this->current_annotation = NULL; 408 this->annotation_string = NULL; 409 this->annotation_ir = NULL; 410 this->base_ir = NULL; 411 } 412 ~fs_visitor() 413 { 414 talloc_free(this->mem_ctx); 415 hash_table_dtor(this->variable_ht); 416 } 417 418 fs_reg *variable_storage(ir_variable *var); 419 420 void visit(ir_variable *ir); 421 void visit(ir_assignment *ir); 422 void visit(ir_dereference_variable *ir); 423 void visit(ir_dereference_record *ir); 424 void visit(ir_dereference_array *ir); 425 void visit(ir_expression *ir); 426 void visit(ir_texture *ir); 427 void visit(ir_if *ir); 428 void visit(ir_constant *ir); 429 void visit(ir_swizzle *ir); 430 void visit(ir_return *ir); 431 void visit(ir_loop *ir); 432 void visit(ir_loop_jump *ir); 433 void visit(ir_discard *ir); 434 void visit(ir_call *ir); 435 void visit(ir_function *ir); 436 void visit(ir_function_signature *ir); 437 438 fs_inst *emit(fs_inst inst); 439 void assign_curb_setup(); 440 void assign_urb_setup(); 441 void assign_regs(); 442 void generate_code(); 443 void generate_fb_write(fs_inst *inst); 444 void generate_linterp(fs_inst *inst, struct brw_reg dst, 445 struct brw_reg *src); 446 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 447 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 448 void generate_discard(fs_inst *inst); 449 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 450 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 451 452 void emit_dummy_fs(); 453 void emit_interpolation(); 454 void emit_pinterp(int location); 455 void emit_fb_writes(); 456 457 struct brw_reg interp_reg(int location, int channel); 458 int setup_uniform_values(int loc, const glsl_type *type); 459 460 struct brw_context *brw; 461 const struct gl_fragment_program *fp; 462 struct intel_context *intel; 463 GLcontext *ctx; 464 struct brw_wm_compile *c; 465 struct brw_compile *p; 466 struct brw_shader *shader; 467 void *mem_ctx; 468 exec_list instructions; 469 int next_abstract_grf; 470 struct hash_table *variable_ht; 471 ir_variable *frag_color, *frag_data, *frag_depth; 472 int first_non_payload_grf; 473 474 /** @{ debug annotation info */ 475 const char *current_annotation; 476 ir_instruction *base_ir; 477 const char **annotation_string; 478 ir_instruction **annotation_ir; 479 /** @} */ 480 481 bool fail; 482 483 /* Result of last visit() method. */ 484 fs_reg result; 485 486 fs_reg pixel_x; 487 fs_reg pixel_y; 488 fs_reg pixel_w; 489 fs_reg delta_x; 490 fs_reg delta_y; 491 fs_reg interp_attrs[64]; 492 493 int grf_used; 494 495}; 496 497/** Fixed HW reg constructor. */ 498fs_reg::fs_reg(enum register_file file, int hw_reg) 499{ 500 init(); 501 this->file = file; 502 this->hw_reg = hw_reg; 503 this->type = BRW_REGISTER_TYPE_F; 504} 505 506int 507brw_type_for_base_type(const struct glsl_type *type) 508{ 509 switch (type->base_type) { 510 case GLSL_TYPE_FLOAT: 511 return BRW_REGISTER_TYPE_F; 512 case GLSL_TYPE_INT: 513 case GLSL_TYPE_BOOL: 514 return BRW_REGISTER_TYPE_D; 515 case GLSL_TYPE_UINT: 516 return BRW_REGISTER_TYPE_UD; 517 case GLSL_TYPE_ARRAY: 518 case GLSL_TYPE_STRUCT: 519 /* These should be overridden with the type of the member when 520 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 521 * way to trip up if we don't. 522 */ 523 return BRW_REGISTER_TYPE_UD; 524 default: 525 assert(!"not reached"); 526 return BRW_REGISTER_TYPE_F; 527 } 528} 529 530/** Automatic reg constructor. */ 531fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 532{ 533 init(); 534 535 this->file = GRF; 536 this->reg = v->next_abstract_grf; 537 this->reg_offset = 0; 538 v->next_abstract_grf += type_size(type); 539 this->type = brw_type_for_base_type(type); 540} 541 542fs_reg * 543fs_visitor::variable_storage(ir_variable *var) 544{ 545 return (fs_reg *)hash_table_find(this->variable_ht, var); 546} 547 548/* Our support for uniforms is piggy-backed on the struct 549 * gl_fragment_program, because that's where the values actually 550 * get stored, rather than in some global gl_shader_program uniform 551 * store. 552 */ 553int 554fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 555{ 556 unsigned int offset = 0; 557 float *vec_values; 558 559 if (type->is_matrix()) { 560 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 561 type->vector_elements, 562 1); 563 564 for (unsigned int i = 0; i < type->matrix_columns; i++) { 565 offset += setup_uniform_values(loc + offset, column); 566 } 567 568 return offset; 569 } 570 571 switch (type->base_type) { 572 case GLSL_TYPE_FLOAT: 573 case GLSL_TYPE_UINT: 574 case GLSL_TYPE_INT: 575 case GLSL_TYPE_BOOL: 576 vec_values = fp->Base.Parameters->ParameterValues[loc]; 577 for (unsigned int i = 0; i < type->vector_elements; i++) { 578 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 579 } 580 return 1; 581 582 case GLSL_TYPE_STRUCT: 583 for (unsigned int i = 0; i < type->length; i++) { 584 offset += setup_uniform_values(loc + offset, 585 type->fields.structure[i].type); 586 } 587 return offset; 588 589 case GLSL_TYPE_ARRAY: 590 for (unsigned int i = 0; i < type->length; i++) { 591 offset += setup_uniform_values(loc + offset, type->fields.array); 592 } 593 return offset; 594 595 case GLSL_TYPE_SAMPLER: 596 /* The sampler takes up a slot, but we don't use any values from it. */ 597 return 1; 598 599 default: 600 assert(!"not reached"); 601 return 0; 602 } 603} 604 605void 606fs_visitor::visit(ir_variable *ir) 607{ 608 fs_reg *reg = NULL; 609 610 if (strcmp(ir->name, "gl_FragColor") == 0) { 611 this->frag_color = ir; 612 } else if (strcmp(ir->name, "gl_FragData") == 0) { 613 this->frag_data = ir; 614 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 615 this->frag_depth = ir; 616 assert(!"FINISHME: this hangs currently."); 617 } 618 619 if (ir->mode == ir_var_in) { 620 if (strcmp(ir->name, "gl_FrontFacing") == 0) { 621 reg = new(this->mem_ctx) fs_reg(this, ir->type); 622 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 623 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 624 * us front face 625 */ 626 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 627 *reg, 628 fs_reg(r1_6ud), 629 fs_reg(1u << 31))); 630 inst->conditional_mod = BRW_CONDITIONAL_L; 631 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 632 } else { 633 reg = &this->interp_attrs[ir->location]; 634 } 635 } 636 637 if (ir->mode == ir_var_uniform) { 638 int param_index = c->prog_data.nr_params; 639 640 setup_uniform_values(ir->location, ir->type); 641 642 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 643 } 644 645 if (!reg) 646 reg = new(this->mem_ctx) fs_reg(this, ir->type); 647 648 hash_table_insert(this->variable_ht, reg, ir); 649} 650 651void 652fs_visitor::visit(ir_dereference_variable *ir) 653{ 654 fs_reg *reg = variable_storage(ir->var); 655 this->result = *reg; 656} 657 658void 659fs_visitor::visit(ir_dereference_record *ir) 660{ 661 const glsl_type *struct_type = ir->record->type; 662 663 ir->record->accept(this); 664 665 unsigned int offset = 0; 666 for (unsigned int i = 0; i < struct_type->length; i++) { 667 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 668 break; 669 offset += type_size(struct_type->fields.structure[i].type); 670 } 671 this->result.reg_offset += offset; 672 this->result.type = brw_type_for_base_type(ir->type); 673} 674 675void 676fs_visitor::visit(ir_dereference_array *ir) 677{ 678 ir_constant *index; 679 int element_size; 680 681 ir->array->accept(this); 682 index = ir->array_index->as_constant(); 683 684 if (ir->type->is_matrix()) { 685 element_size = ir->type->vector_elements; 686 } else { 687 element_size = type_size(ir->type); 688 this->result.type = brw_type_for_base_type(ir->type); 689 } 690 691 if (index) { 692 assert(this->result.file == UNIFORM || 693 (this->result.file == GRF && 694 this->result.reg != 0)); 695 this->result.reg_offset += index->value.i[0] * element_size; 696 } else { 697 assert(!"FINISHME: non-constant matrix column"); 698 } 699} 700 701void 702fs_visitor::visit(ir_expression *ir) 703{ 704 unsigned int operand; 705 fs_reg op[2], temp; 706 fs_reg result; 707 fs_inst *inst; 708 709 for (operand = 0; operand < ir->get_num_operands(); operand++) { 710 ir->operands[operand]->accept(this); 711 if (this->result.file == BAD_FILE) { 712 ir_print_visitor v; 713 printf("Failed to get tree for expression operand:\n"); 714 ir->operands[operand]->accept(&v); 715 this->fail = true; 716 } 717 op[operand] = this->result; 718 719 /* Matrix expression operands should have been broken down to vector 720 * operations already. 721 */ 722 assert(!ir->operands[operand]->type->is_matrix()); 723 /* And then those vector operands should have been broken down to scalar. 724 */ 725 assert(!ir->operands[operand]->type->is_vector()); 726 } 727 728 /* Storage for our result. If our result goes into an assignment, it will 729 * just get copy-propagated out, so no worries. 730 */ 731 this->result = fs_reg(this, ir->type); 732 733 switch (ir->operation) { 734 case ir_unop_logic_not: 735 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 736 break; 737 case ir_unop_neg: 738 op[0].negate = !op[0].negate; 739 this->result = op[0]; 740 break; 741 case ir_unop_abs: 742 op[0].abs = true; 743 this->result = op[0]; 744 break; 745 case ir_unop_sign: 746 temp = fs_reg(this, ir->type); 747 748 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 749 750 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 751 inst->conditional_mod = BRW_CONDITIONAL_G; 752 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 753 inst->predicated = true; 754 755 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 756 inst->conditional_mod = BRW_CONDITIONAL_L; 757 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 758 inst->predicated = true; 759 760 break; 761 case ir_unop_rcp: 762 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 763 break; 764 765 case ir_unop_exp2: 766 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 767 break; 768 case ir_unop_log2: 769 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 770 break; 771 case ir_unop_exp: 772 case ir_unop_log: 773 assert(!"not reached: should be handled by ir_explog_to_explog2"); 774 break; 775 case ir_unop_sin: 776 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 777 break; 778 case ir_unop_cos: 779 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 780 break; 781 782 case ir_unop_dFdx: 783 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 784 break; 785 case ir_unop_dFdy: 786 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 787 break; 788 789 case ir_binop_add: 790 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 791 break; 792 case ir_binop_sub: 793 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 794 break; 795 796 case ir_binop_mul: 797 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 798 break; 799 case ir_binop_div: 800 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 801 break; 802 case ir_binop_mod: 803 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 804 break; 805 806 case ir_binop_less: 807 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 808 inst->conditional_mod = BRW_CONDITIONAL_L; 809 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 810 break; 811 case ir_binop_greater: 812 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 813 inst->conditional_mod = BRW_CONDITIONAL_G; 814 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 815 break; 816 case ir_binop_lequal: 817 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 818 inst->conditional_mod = BRW_CONDITIONAL_LE; 819 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 820 break; 821 case ir_binop_gequal: 822 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 823 inst->conditional_mod = BRW_CONDITIONAL_GE; 824 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 825 break; 826 case ir_binop_equal: 827 case ir_binop_all_equal: /* same as nequal for scalars */ 828 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 829 inst->conditional_mod = BRW_CONDITIONAL_Z; 830 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 831 break; 832 case ir_binop_nequal: 833 case ir_binop_any_nequal: /* same as nequal for scalars */ 834 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 835 inst->conditional_mod = BRW_CONDITIONAL_NZ; 836 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 837 break; 838 839 case ir_binop_logic_xor: 840 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 841 break; 842 843 case ir_binop_logic_or: 844 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 845 break; 846 847 case ir_binop_logic_and: 848 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 849 break; 850 851 case ir_binop_dot: 852 case ir_binop_cross: 853 case ir_unop_any: 854 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 855 break; 856 857 case ir_unop_noise: 858 assert(!"not reached: should be handled by lower_noise"); 859 break; 860 861 case ir_unop_sqrt: 862 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 863 break; 864 865 case ir_unop_rsq: 866 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 867 break; 868 869 case ir_unop_i2f: 870 case ir_unop_b2f: 871 case ir_unop_b2i: 872 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 873 break; 874 case ir_unop_f2i: 875 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 876 break; 877 case ir_unop_f2b: 878 case ir_unop_i2b: 879 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 880 inst->conditional_mod = BRW_CONDITIONAL_NZ; 881 882 case ir_unop_trunc: 883 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 884 break; 885 case ir_unop_ceil: 886 op[0].negate = ~op[0].negate; 887 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 888 this->result.negate = true; 889 break; 890 case ir_unop_floor: 891 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 892 break; 893 case ir_unop_fract: 894 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 895 break; 896 897 case ir_binop_min: 898 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 899 inst->conditional_mod = BRW_CONDITIONAL_L; 900 901 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 902 inst->predicated = true; 903 break; 904 case ir_binop_max: 905 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 906 inst->conditional_mod = BRW_CONDITIONAL_G; 907 908 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 909 inst->predicated = true; 910 break; 911 912 case ir_binop_pow: 913 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 914 break; 915 916 case ir_unop_bit_not: 917 case ir_unop_u2f: 918 case ir_binop_lshift: 919 case ir_binop_rshift: 920 case ir_binop_bit_and: 921 case ir_binop_bit_xor: 922 case ir_binop_bit_or: 923 assert(!"GLSL 1.30 features unsupported"); 924 break; 925 } 926} 927 928void 929fs_visitor::visit(ir_assignment *ir) 930{ 931 struct fs_reg l, r; 932 int i; 933 int write_mask; 934 fs_inst *inst; 935 936 /* FINISHME: arrays on the lhs */ 937 ir->lhs->accept(this); 938 l = this->result; 939 940 ir->rhs->accept(this); 941 r = this->result; 942 943 /* FINISHME: This should really set to the correct maximal writemask for each 944 * FINISHME: component written (in the loops below). This case can only 945 * FINISHME: occur for matrices, arrays, and structures. 946 */ 947 if (ir->write_mask == 0) { 948 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 949 write_mask = WRITEMASK_XYZW; 950 } else { 951 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 952 write_mask = ir->write_mask; 953 } 954 955 assert(l.file != BAD_FILE); 956 assert(r.file != BAD_FILE); 957 958 if (ir->condition) { 959 /* Get the condition bool into the predicate. */ 960 ir->condition->accept(this); 961 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 962 inst->conditional_mod = BRW_CONDITIONAL_NZ; 963 } 964 965 for (i = 0; i < type_size(ir->lhs->type); i++) { 966 if (i >= 4 || (write_mask & (1 << i))) { 967 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 968 if (ir->condition) 969 inst->predicated = true; 970 r.reg_offset++; 971 } 972 l.reg_offset++; 973 } 974} 975 976void 977fs_visitor::visit(ir_texture *ir) 978{ 979 int base_mrf = 2; 980 fs_inst *inst = NULL; 981 unsigned int mlen = 0; 982 983 ir->coordinate->accept(this); 984 fs_reg coordinate = this->result; 985 986 if (ir->projector) { 987 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 988 989 ir->projector->accept(this); 990 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 991 992 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 993 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 994 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 995 coordinate.reg_offset++; 996 proj_coordinate.reg_offset++; 997 } 998 proj_coordinate.reg_offset = 0; 999 1000 coordinate = proj_coordinate; 1001 } 1002 1003 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1004 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1005 coordinate.reg_offset++; 1006 } 1007 1008 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 1009 if (intel->gen < 5) 1010 mlen = 3; 1011 1012 if (ir->shadow_comparitor) { 1013 /* For shadow comparisons, we have to supply u,v,r. */ 1014 mlen = 3; 1015 1016 ir->shadow_comparitor->accept(this); 1017 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1018 mlen++; 1019 } 1020 1021 /* Do we ever want to handle writemasking on texture samples? Is it 1022 * performance relevant? 1023 */ 1024 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1025 1026 switch (ir->op) { 1027 case ir_tex: 1028 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1029 break; 1030 case ir_txb: 1031 ir->lod_info.bias->accept(this); 1032 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1033 mlen++; 1034 1035 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1036 break; 1037 case ir_txl: 1038 ir->lod_info.lod->accept(this); 1039 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1040 mlen++; 1041 1042 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1043 break; 1044 case ir_txd: 1045 case ir_txf: 1046 assert(!"GLSL 1.30 features unsupported"); 1047 break; 1048 } 1049 1050 inst->sampler = 1051 _mesa_get_sampler_uniform_value(ir->sampler, 1052 ctx->Shader.CurrentProgram, 1053 &brw->fragment_program->Base); 1054 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1055 1056 this->result = dst; 1057 1058 if (ir->shadow_comparitor) 1059 inst->shadow_compare = true; 1060 inst->mlen = mlen; 1061} 1062 1063void 1064fs_visitor::visit(ir_swizzle *ir) 1065{ 1066 ir->val->accept(this); 1067 fs_reg val = this->result; 1068 1069 fs_reg result = fs_reg(this, ir->type); 1070 this->result = result; 1071 1072 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1073 fs_reg channel = val; 1074 int swiz = 0; 1075 1076 switch (i) { 1077 case 0: 1078 swiz = ir->mask.x; 1079 break; 1080 case 1: 1081 swiz = ir->mask.y; 1082 break; 1083 case 2: 1084 swiz = ir->mask.z; 1085 break; 1086 case 3: 1087 swiz = ir->mask.w; 1088 break; 1089 } 1090 1091 channel.reg_offset += swiz; 1092 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1093 result.reg_offset++; 1094 } 1095} 1096 1097void 1098fs_visitor::visit(ir_discard *ir) 1099{ 1100 assert(ir->condition == NULL); /* FINISHME */ 1101 1102 emit(fs_inst(FS_OPCODE_DISCARD)); 1103} 1104 1105void 1106fs_visitor::visit(ir_constant *ir) 1107{ 1108 fs_reg reg(this, ir->type); 1109 this->result = reg; 1110 1111 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1112 switch (ir->type->base_type) { 1113 case GLSL_TYPE_FLOAT: 1114 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1115 break; 1116 case GLSL_TYPE_UINT: 1117 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1118 break; 1119 case GLSL_TYPE_INT: 1120 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1121 break; 1122 case GLSL_TYPE_BOOL: 1123 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1124 break; 1125 default: 1126 assert(!"Non-float/uint/int/bool constant"); 1127 } 1128 reg.reg_offset++; 1129 } 1130} 1131 1132void 1133fs_visitor::visit(ir_if *ir) 1134{ 1135 fs_inst *inst; 1136 1137 /* Don't point the annotation at the if statement, because then it plus 1138 * the then and else blocks get printed. 1139 */ 1140 this->base_ir = ir->condition; 1141 1142 /* Generate the condition into the condition code. */ 1143 ir->condition->accept(this); 1144 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1145 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1146 1147 inst = emit(fs_inst(BRW_OPCODE_IF)); 1148 inst->predicated = true; 1149 1150 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1151 ir_instruction *ir = (ir_instruction *)iter.get(); 1152 this->base_ir = ir; 1153 1154 ir->accept(this); 1155 } 1156 1157 if (!ir->else_instructions.is_empty()) { 1158 emit(fs_inst(BRW_OPCODE_ELSE)); 1159 1160 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1161 ir_instruction *ir = (ir_instruction *)iter.get(); 1162 this->base_ir = ir; 1163 1164 ir->accept(this); 1165 } 1166 } 1167 1168 emit(fs_inst(BRW_OPCODE_ENDIF)); 1169} 1170 1171void 1172fs_visitor::visit(ir_loop *ir) 1173{ 1174 assert(!ir->from); 1175 assert(!ir->to); 1176 assert(!ir->increment); 1177 assert(!ir->counter); 1178 1179 emit(fs_inst(BRW_OPCODE_DO)); 1180 1181 /* Start a safety counter. If the user messed up their loop 1182 * counting, we don't want to hang the GPU. 1183 */ 1184 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1185 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1186 1187 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1188 ir_instruction *ir = (ir_instruction *)iter.get(); 1189 fs_inst *inst; 1190 1191 this->base_ir = ir; 1192 ir->accept(this); 1193 1194 /* Check the maximum loop iters counter. */ 1195 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1196 inst->conditional_mod = BRW_CONDITIONAL_Z; 1197 1198 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1199 inst->predicated = true; 1200 } 1201 1202 emit(fs_inst(BRW_OPCODE_WHILE)); 1203} 1204 1205void 1206fs_visitor::visit(ir_loop_jump *ir) 1207{ 1208 switch (ir->mode) { 1209 case ir_loop_jump::jump_break: 1210 emit(fs_inst(BRW_OPCODE_BREAK)); 1211 break; 1212 case ir_loop_jump::jump_continue: 1213 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1214 break; 1215 } 1216} 1217 1218void 1219fs_visitor::visit(ir_call *ir) 1220{ 1221 assert(!"FINISHME"); 1222} 1223 1224void 1225fs_visitor::visit(ir_return *ir) 1226{ 1227 assert(!"FINISHME"); 1228} 1229 1230void 1231fs_visitor::visit(ir_function *ir) 1232{ 1233 /* Ignore function bodies other than main() -- we shouldn't see calls to 1234 * them since they should all be inlined before we get to ir_to_mesa. 1235 */ 1236 if (strcmp(ir->name, "main") == 0) { 1237 const ir_function_signature *sig; 1238 exec_list empty; 1239 1240 sig = ir->matching_signature(&empty); 1241 1242 assert(sig); 1243 1244 foreach_iter(exec_list_iterator, iter, sig->body) { 1245 ir_instruction *ir = (ir_instruction *)iter.get(); 1246 this->base_ir = ir; 1247 1248 ir->accept(this); 1249 } 1250 } 1251} 1252 1253void 1254fs_visitor::visit(ir_function_signature *ir) 1255{ 1256 assert(!"not reached"); 1257 (void)ir; 1258} 1259 1260fs_inst * 1261fs_visitor::emit(fs_inst inst) 1262{ 1263 fs_inst *list_inst = new(mem_ctx) fs_inst; 1264 *list_inst = inst; 1265 1266 list_inst->annotation = this->current_annotation; 1267 list_inst->ir = this->base_ir; 1268 1269 this->instructions.push_tail(list_inst); 1270 1271 return list_inst; 1272} 1273 1274/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1275void 1276fs_visitor::emit_dummy_fs() 1277{ 1278 /* Everyone's favorite color. */ 1279 emit(fs_inst(BRW_OPCODE_MOV, 1280 fs_reg(MRF, 2), 1281 fs_reg(1.0f))); 1282 emit(fs_inst(BRW_OPCODE_MOV, 1283 fs_reg(MRF, 3), 1284 fs_reg(0.0f))); 1285 emit(fs_inst(BRW_OPCODE_MOV, 1286 fs_reg(MRF, 4), 1287 fs_reg(1.0f))); 1288 emit(fs_inst(BRW_OPCODE_MOV, 1289 fs_reg(MRF, 5), 1290 fs_reg(0.0f))); 1291 1292 fs_inst *write; 1293 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1294 fs_reg(0), 1295 fs_reg(0))); 1296} 1297 1298/* The register location here is relative to the start of the URB 1299 * data. It will get adjusted to be a real location before 1300 * generate_code() time. 1301 */ 1302struct brw_reg 1303fs_visitor::interp_reg(int location, int channel) 1304{ 1305 int regnr = location * 2 + channel / 2; 1306 int stride = (channel & 1) * 4; 1307 1308 return brw_vec1_grf(regnr, stride); 1309} 1310 1311/** Emits the interpolation for the varying inputs. */ 1312void 1313fs_visitor::emit_interpolation() 1314{ 1315 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1316 /* For now, the source regs for the setup URB data will be unset, 1317 * since we don't know until codegen how many push constants we'll 1318 * use, and therefore what the setup URB offset is. 1319 */ 1320 fs_reg src_reg = reg_undef; 1321 1322 this->current_annotation = "compute pixel centers"; 1323 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1324 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1325 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1326 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1327 emit(fs_inst(BRW_OPCODE_ADD, 1328 this->pixel_x, 1329 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1330 fs_reg(brw_imm_v(0x10101010)))); 1331 emit(fs_inst(BRW_OPCODE_ADD, 1332 this->pixel_y, 1333 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1334 fs_reg(brw_imm_v(0x11001100)))); 1335 1336 this->current_annotation = "compute pixel deltas from v0"; 1337 this->delta_x = fs_reg(this, glsl_type::float_type); 1338 this->delta_y = fs_reg(this, glsl_type::float_type); 1339 emit(fs_inst(BRW_OPCODE_ADD, 1340 this->delta_x, 1341 this->pixel_x, 1342 fs_reg(negate(brw_vec1_grf(1, 0))))); 1343 emit(fs_inst(BRW_OPCODE_ADD, 1344 this->delta_y, 1345 this->pixel_y, 1346 fs_reg(negate(brw_vec1_grf(1, 1))))); 1347 1348 this->current_annotation = "compute pos.w and 1/pos.w"; 1349 /* Compute wpos. Unlike many other varying inputs, we usually need it 1350 * to produce 1/w, and the varying variable wouldn't show up. 1351 */ 1352 fs_reg wpos = fs_reg(this, glsl_type::vec4_type); 1353 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos; 1354 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */ 1355 wpos.reg_offset++; 1356 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */ 1357 wpos.reg_offset++; 1358 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1359 interp_reg(FRAG_ATTRIB_WPOS, 2))); 1360 wpos.reg_offset++; 1361 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1362 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1363 /* Compute the pixel W value from wpos.w. */ 1364 this->pixel_w = fs_reg(this, glsl_type::float_type); 1365 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos)); 1366 1367 foreach_iter(exec_list_iterator, iter, *this->shader->ir) { 1368 ir_instruction *ir = (ir_instruction *)iter.get(); 1369 ir_variable *var = ir->as_variable(); 1370 1371 if (!var) 1372 continue; 1373 1374 if (var->mode != ir_var_in) 1375 continue; 1376 1377 /* If it's already set up (WPOS), skip. */ 1378 if (var->location == 0) 1379 continue; 1380 1381 this->current_annotation = talloc_asprintf(this->mem_ctx, 1382 "interpolate %s " 1383 "(FRAG_ATTRIB[%d])", 1384 var->name, 1385 var->location); 1386 emit_pinterp(var->location); 1387 } 1388 this->current_annotation = NULL; 1389} 1390 1391void 1392fs_visitor::emit_pinterp(int location) 1393{ 1394 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type); 1395 this->interp_attrs[location] = interp_attr; 1396 1397 for (unsigned int i = 0; i < 4; i++) { 1398 struct brw_reg interp = interp_reg(location, i); 1399 emit(fs_inst(FS_OPCODE_LINTERP, 1400 interp_attr, 1401 this->delta_x, 1402 this->delta_y, 1403 fs_reg(interp))); 1404 interp_attr.reg_offset++; 1405 } 1406 interp_attr.reg_offset -= 4; 1407 1408 for (unsigned int i = 0; i < 4; i++) { 1409 emit(fs_inst(BRW_OPCODE_MUL, 1410 interp_attr, 1411 interp_attr, 1412 this->pixel_w)); 1413 interp_attr.reg_offset++; 1414 } 1415} 1416 1417void 1418fs_visitor::emit_fb_writes() 1419{ 1420 this->current_annotation = "FB write"; 1421 1422 assert(this->frag_color || !"FINISHME: MRT"); 1423 fs_reg color = *(variable_storage(this->frag_color)); 1424 1425 for (int i = 0; i < 4; i++) { 1426 emit(fs_inst(BRW_OPCODE_MOV, 1427 fs_reg(MRF, 2 + i), 1428 color)); 1429 color.reg_offset++; 1430 } 1431 1432 emit(fs_inst(FS_OPCODE_FB_WRITE, 1433 fs_reg(0), 1434 fs_reg(0))); 1435 1436 this->current_annotation = NULL; 1437} 1438 1439void 1440fs_visitor::generate_fb_write(fs_inst *inst) 1441{ 1442 GLboolean eot = 1; /* FINISHME: MRT */ 1443 /* FINISHME: AADS */ 1444 1445 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1446 * move, here's g1. 1447 */ 1448 brw_push_insn_state(p); 1449 brw_set_mask_control(p, BRW_MASK_DISABLE); 1450 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1451 brw_MOV(p, 1452 brw_message_reg(1), 1453 brw_vec8_grf(1, 0)); 1454 brw_pop_insn_state(p); 1455 1456 int nr = 2 + 4; 1457 1458 brw_fb_WRITE(p, 1459 8, /* dispatch_width */ 1460 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1461 0, /* base MRF */ 1462 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1463 0, /* FINISHME: MRT target */ 1464 nr, 1465 0, 1466 eot); 1467} 1468 1469void 1470fs_visitor::generate_linterp(fs_inst *inst, 1471 struct brw_reg dst, struct brw_reg *src) 1472{ 1473 struct brw_reg delta_x = src[0]; 1474 struct brw_reg delta_y = src[1]; 1475 struct brw_reg interp = src[2]; 1476 1477 if (brw->has_pln && 1478 delta_y.nr == delta_x.nr + 1 && 1479 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1480 brw_PLN(p, dst, interp, delta_x); 1481 } else { 1482 brw_LINE(p, brw_null_reg(), interp, delta_x); 1483 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1484 } 1485} 1486 1487void 1488fs_visitor::generate_math(fs_inst *inst, 1489 struct brw_reg dst, struct brw_reg *src) 1490{ 1491 int op; 1492 1493 switch (inst->opcode) { 1494 case FS_OPCODE_RCP: 1495 op = BRW_MATH_FUNCTION_INV; 1496 break; 1497 case FS_OPCODE_RSQ: 1498 op = BRW_MATH_FUNCTION_RSQ; 1499 break; 1500 case FS_OPCODE_SQRT: 1501 op = BRW_MATH_FUNCTION_SQRT; 1502 break; 1503 case FS_OPCODE_EXP2: 1504 op = BRW_MATH_FUNCTION_EXP; 1505 break; 1506 case FS_OPCODE_LOG2: 1507 op = BRW_MATH_FUNCTION_LOG; 1508 break; 1509 case FS_OPCODE_POW: 1510 op = BRW_MATH_FUNCTION_POW; 1511 break; 1512 case FS_OPCODE_SIN: 1513 op = BRW_MATH_FUNCTION_SIN; 1514 break; 1515 case FS_OPCODE_COS: 1516 op = BRW_MATH_FUNCTION_COS; 1517 break; 1518 default: 1519 assert(!"not reached: unknown math function"); 1520 op = 0; 1521 break; 1522 } 1523 1524 if (inst->opcode == FS_OPCODE_POW) { 1525 brw_MOV(p, brw_message_reg(3), src[1]); 1526 } 1527 1528 brw_math(p, dst, 1529 op, 1530 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1531 BRW_MATH_SATURATE_NONE, 1532 2, src[0], 1533 BRW_MATH_DATA_VECTOR, 1534 BRW_MATH_PRECISION_FULL); 1535} 1536 1537void 1538fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1539{ 1540 int msg_type = -1; 1541 int rlen = 4; 1542 1543 if (intel->gen == 5) { 1544 switch (inst->opcode) { 1545 case FS_OPCODE_TEX: 1546 if (inst->shadow_compare) { 1547 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1548 } else { 1549 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1550 } 1551 break; 1552 case FS_OPCODE_TXB: 1553 if (inst->shadow_compare) { 1554 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1555 } else { 1556 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1557 } 1558 break; 1559 } 1560 } else { 1561 switch (inst->opcode) { 1562 case FS_OPCODE_TEX: 1563 /* Note that G45 and older determines shadow compare and dispatch width 1564 * from message length for most messages. 1565 */ 1566 if (inst->shadow_compare) { 1567 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1568 } else { 1569 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1570 } 1571 case FS_OPCODE_TXB: 1572 if (inst->shadow_compare) { 1573 assert(!"FINISHME: shadow compare with bias."); 1574 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1575 } else { 1576 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1577 rlen = 8; 1578 } 1579 break; 1580 } 1581 } 1582 assert(msg_type != -1); 1583 1584 /* g0 header. */ 1585 src.nr--; 1586 1587 brw_SAMPLE(p, 1588 retype(dst, BRW_REGISTER_TYPE_UW), 1589 src.nr, 1590 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1591 SURF_INDEX_TEXTURE(inst->sampler), 1592 inst->sampler, 1593 WRITEMASK_XYZW, 1594 msg_type, 1595 rlen, 1596 inst->mlen + 1, 1597 0, 1598 1, 1599 BRW_SAMPLER_SIMD_MODE_SIMD8); 1600} 1601 1602 1603/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1604 * looking like: 1605 * 1606 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1607 * 1608 * and we're trying to produce: 1609 * 1610 * DDX DDY 1611 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1612 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1613 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1614 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1615 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1616 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1617 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1618 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1619 * 1620 * and add another set of two more subspans if in 16-pixel dispatch mode. 1621 * 1622 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1623 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1624 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1625 * between each other. We could probably do it like ddx and swizzle the right 1626 * order later, but bail for now and just produce 1627 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1628 */ 1629void 1630fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1631{ 1632 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1633 BRW_REGISTER_TYPE_F, 1634 BRW_VERTICAL_STRIDE_2, 1635 BRW_WIDTH_2, 1636 BRW_HORIZONTAL_STRIDE_0, 1637 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1638 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1639 BRW_REGISTER_TYPE_F, 1640 BRW_VERTICAL_STRIDE_2, 1641 BRW_WIDTH_2, 1642 BRW_HORIZONTAL_STRIDE_0, 1643 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1644 brw_ADD(p, dst, src0, negate(src1)); 1645} 1646 1647void 1648fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1649{ 1650 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1651 BRW_REGISTER_TYPE_F, 1652 BRW_VERTICAL_STRIDE_4, 1653 BRW_WIDTH_4, 1654 BRW_HORIZONTAL_STRIDE_0, 1655 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1656 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1657 BRW_REGISTER_TYPE_F, 1658 BRW_VERTICAL_STRIDE_4, 1659 BRW_WIDTH_4, 1660 BRW_HORIZONTAL_STRIDE_0, 1661 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1662 brw_ADD(p, dst, src0, negate(src1)); 1663} 1664 1665void 1666fs_visitor::generate_discard(fs_inst *inst) 1667{ 1668 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1669 brw_push_insn_state(p); 1670 brw_set_mask_control(p, BRW_MASK_DISABLE); 1671 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ 1672 brw_AND(p, g0, c->emit_mask_reg, g0); 1673 brw_pop_insn_state(p); 1674} 1675 1676static void 1677trivial_assign_reg(int header_size, fs_reg *reg) 1678{ 1679 if (reg->file == GRF && reg->reg != 0) { 1680 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; 1681 reg->reg = 0; 1682 } 1683} 1684 1685void 1686fs_visitor::assign_curb_setup() 1687{ 1688 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1689 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1690 1691 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1692 c->prog_data.curb_read_length) & 1) { 1693 /* Align the start of the interpolation coefficients so that we can use 1694 * the PLN instruction. 1695 */ 1696 c->prog_data.first_curbe_grf++; 1697 } 1698 1699 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1700 foreach_iter(exec_list_iterator, iter, this->instructions) { 1701 fs_inst *inst = (fs_inst *)iter.get(); 1702 1703 for (unsigned int i = 0; i < 3; i++) { 1704 if (inst->src[i].file == UNIFORM) { 1705 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1706 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1707 constant_nr / 8, 1708 constant_nr % 8); 1709 1710 inst->src[i].file = FIXED_HW_REG; 1711 inst->src[i].fixed_hw_reg = brw_reg; 1712 } 1713 } 1714 } 1715} 1716 1717void 1718fs_visitor::assign_urb_setup() 1719{ 1720 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1721 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1722 1723 c->prog_data.urb_read_length = 0; 1724 1725 /* Figure out where each of the incoming setup attributes lands. */ 1726 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1727 interp_reg_nr[i] = -1; 1728 1729 if (i != FRAG_ATTRIB_WPOS && 1730 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1731 continue; 1732 1733 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1734 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1735 c->prog_data.urb_read_length += 2; 1736 } 1737 1738 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1739 * the correct setup input. 1740 */ 1741 foreach_iter(exec_list_iterator, iter, this->instructions) { 1742 fs_inst *inst = (fs_inst *)iter.get(); 1743 1744 if (inst->opcode != FS_OPCODE_LINTERP) 1745 continue; 1746 1747 assert(inst->src[2].file == FIXED_HW_REG); 1748 1749 int location = inst->src[2].fixed_hw_reg.nr / 2; 1750 assert(interp_reg_nr[location] != -1); 1751 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1752 (inst->src[2].fixed_hw_reg.nr & 1)); 1753 } 1754 1755 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1756} 1757 1758void 1759fs_visitor::assign_regs() 1760{ 1761 int header_size = this->first_non_payload_grf; 1762 int last_grf = 0; 1763 1764 /* FINISHME: trivial assignment of register numbers */ 1765 foreach_iter(exec_list_iterator, iter, this->instructions) { 1766 fs_inst *inst = (fs_inst *)iter.get(); 1767 1768 trivial_assign_reg(header_size, &inst->dst); 1769 trivial_assign_reg(header_size, &inst->src[0]); 1770 trivial_assign_reg(header_size, &inst->src[1]); 1771 1772 last_grf = MAX2(last_grf, inst->dst.hw_reg); 1773 last_grf = MAX2(last_grf, inst->src[0].hw_reg); 1774 last_grf = MAX2(last_grf, inst->src[1].hw_reg); 1775 } 1776 1777 this->grf_used = last_grf + 1; 1778} 1779 1780static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 1781{ 1782 struct brw_reg brw_reg; 1783 1784 switch (reg->file) { 1785 case GRF: 1786 case ARF: 1787 case MRF: 1788 brw_reg = brw_vec8_reg(reg->file, 1789 reg->hw_reg, 0); 1790 brw_reg = retype(brw_reg, reg->type); 1791 break; 1792 case IMM: 1793 switch (reg->type) { 1794 case BRW_REGISTER_TYPE_F: 1795 brw_reg = brw_imm_f(reg->imm.f); 1796 break; 1797 case BRW_REGISTER_TYPE_D: 1798 brw_reg = brw_imm_d(reg->imm.i); 1799 break; 1800 case BRW_REGISTER_TYPE_UD: 1801 brw_reg = brw_imm_ud(reg->imm.u); 1802 break; 1803 default: 1804 assert(!"not reached"); 1805 break; 1806 } 1807 break; 1808 case FIXED_HW_REG: 1809 brw_reg = reg->fixed_hw_reg; 1810 break; 1811 case BAD_FILE: 1812 /* Probably unused. */ 1813 brw_reg = brw_null_reg(); 1814 break; 1815 case UNIFORM: 1816 assert(!"not reached"); 1817 brw_reg = brw_null_reg(); 1818 break; 1819 } 1820 if (reg->abs) 1821 brw_reg = brw_abs(brw_reg); 1822 if (reg->negate) 1823 brw_reg = negate(brw_reg); 1824 1825 return brw_reg; 1826} 1827 1828void 1829fs_visitor::generate_code() 1830{ 1831 unsigned int annotation_len = 0; 1832 int last_native_inst = 0; 1833 struct brw_instruction *if_stack[16], *loop_stack[16]; 1834 int if_stack_depth = 0, loop_stack_depth = 0; 1835 int if_depth_in_loop[16]; 1836 1837 if_depth_in_loop[loop_stack_depth] = 0; 1838 1839 memset(&if_stack, 0, sizeof(if_stack)); 1840 foreach_iter(exec_list_iterator, iter, this->instructions) { 1841 fs_inst *inst = (fs_inst *)iter.get(); 1842 struct brw_reg src[3], dst; 1843 1844 for (unsigned int i = 0; i < 3; i++) { 1845 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 1846 } 1847 dst = brw_reg_from_fs_reg(&inst->dst); 1848 1849 brw_set_conditionalmod(p, inst->conditional_mod); 1850 brw_set_predicate_control(p, inst->predicated); 1851 1852 switch (inst->opcode) { 1853 case BRW_OPCODE_MOV: 1854 brw_MOV(p, dst, src[0]); 1855 break; 1856 case BRW_OPCODE_ADD: 1857 brw_ADD(p, dst, src[0], src[1]); 1858 break; 1859 case BRW_OPCODE_MUL: 1860 brw_MUL(p, dst, src[0], src[1]); 1861 break; 1862 1863 case BRW_OPCODE_FRC: 1864 brw_FRC(p, dst, src[0]); 1865 break; 1866 case BRW_OPCODE_RNDD: 1867 brw_RNDD(p, dst, src[0]); 1868 break; 1869 case BRW_OPCODE_RNDZ: 1870 brw_RNDZ(p, dst, src[0]); 1871 break; 1872 1873 case BRW_OPCODE_AND: 1874 brw_AND(p, dst, src[0], src[1]); 1875 break; 1876 case BRW_OPCODE_OR: 1877 brw_OR(p, dst, src[0], src[1]); 1878 break; 1879 case BRW_OPCODE_XOR: 1880 brw_XOR(p, dst, src[0], src[1]); 1881 break; 1882 1883 case BRW_OPCODE_CMP: 1884 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 1885 break; 1886 case BRW_OPCODE_SEL: 1887 brw_SEL(p, dst, src[0], src[1]); 1888 break; 1889 1890 case BRW_OPCODE_IF: 1891 assert(if_stack_depth < 16); 1892 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 1893 if_depth_in_loop[loop_stack_depth]++; 1894 if_stack_depth++; 1895 break; 1896 case BRW_OPCODE_ELSE: 1897 if_stack[if_stack_depth - 1] = 1898 brw_ELSE(p, if_stack[if_stack_depth - 1]); 1899 break; 1900 case BRW_OPCODE_ENDIF: 1901 if_stack_depth--; 1902 brw_ENDIF(p , if_stack[if_stack_depth]); 1903 if_depth_in_loop[loop_stack_depth]--; 1904 break; 1905 1906 case BRW_OPCODE_DO: 1907 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 1908 if_depth_in_loop[loop_stack_depth] = 0; 1909 break; 1910 1911 case BRW_OPCODE_BREAK: 1912 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 1913 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1914 break; 1915 case BRW_OPCODE_CONTINUE: 1916 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 1917 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1918 break; 1919 1920 case BRW_OPCODE_WHILE: { 1921 struct brw_instruction *inst0, *inst1; 1922 GLuint br = 1; 1923 1924 if (intel->gen == 5) 1925 br = 2; 1926 1927 assert(loop_stack_depth > 0); 1928 loop_stack_depth--; 1929 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 1930 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 1931 while (inst0 > loop_stack[loop_stack_depth]) { 1932 inst0--; 1933 if (inst0->header.opcode == BRW_OPCODE_BREAK && 1934 inst0->bits3.if_else.jump_count == 0) { 1935 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 1936 } 1937 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 1938 inst0->bits3.if_else.jump_count == 0) { 1939 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 1940 } 1941 } 1942 } 1943 break; 1944 1945 case FS_OPCODE_RCP: 1946 case FS_OPCODE_RSQ: 1947 case FS_OPCODE_SQRT: 1948 case FS_OPCODE_EXP2: 1949 case FS_OPCODE_LOG2: 1950 case FS_OPCODE_POW: 1951 case FS_OPCODE_SIN: 1952 case FS_OPCODE_COS: 1953 generate_math(inst, dst, src); 1954 break; 1955 case FS_OPCODE_LINTERP: 1956 generate_linterp(inst, dst, src); 1957 break; 1958 case FS_OPCODE_TEX: 1959 case FS_OPCODE_TXB: 1960 case FS_OPCODE_TXL: 1961 generate_tex(inst, dst, src[0]); 1962 break; 1963 case FS_OPCODE_DISCARD: 1964 generate_discard(inst); 1965 break; 1966 case FS_OPCODE_DDX: 1967 generate_ddx(inst, dst, src[0]); 1968 break; 1969 case FS_OPCODE_DDY: 1970 generate_ddy(inst, dst, src[0]); 1971 break; 1972 case FS_OPCODE_FB_WRITE: 1973 generate_fb_write(inst); 1974 break; 1975 default: 1976 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 1977 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 1978 brw_opcodes[inst->opcode].name); 1979 } else { 1980 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 1981 } 1982 this->fail = true; 1983 } 1984 1985 if (annotation_len < p->nr_insn) { 1986 annotation_len *= 2; 1987 if (annotation_len < 16) 1988 annotation_len = 16; 1989 1990 this->annotation_string = talloc_realloc(this->mem_ctx, 1991 annotation_string, 1992 const char *, 1993 annotation_len); 1994 this->annotation_ir = talloc_realloc(this->mem_ctx, 1995 annotation_ir, 1996 ir_instruction *, 1997 annotation_len); 1998 } 1999 2000 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2001 this->annotation_string[i] = inst->annotation; 2002 this->annotation_ir[i] = inst->ir; 2003 } 2004 last_native_inst = p->nr_insn; 2005 } 2006} 2007 2008GLboolean 2009brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2010{ 2011 struct brw_compile *p = &c->func; 2012 struct intel_context *intel = &brw->intel; 2013 GLcontext *ctx = &intel->ctx; 2014 struct brw_shader *shader = NULL; 2015 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2016 2017 if (!prog) 2018 return GL_FALSE; 2019 2020 if (!using_new_fs) 2021 return GL_FALSE; 2022 2023 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2024 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2025 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2026 break; 2027 } 2028 } 2029 if (!shader) 2030 return GL_FALSE; 2031 2032 /* We always use 8-wide mode, at least for now. For one, flow 2033 * control only works in 8-wide. Also, when we're fragment shader 2034 * bound, we're almost always under register pressure as well, so 2035 * 8-wide would save us from the performance cliff of spilling 2036 * regs. 2037 */ 2038 c->dispatch_width = 8; 2039 2040 if (INTEL_DEBUG & DEBUG_WM) { 2041 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2042 _mesa_print_ir(shader->ir, NULL); 2043 printf("\n"); 2044 } 2045 2046 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2047 */ 2048 fs_visitor v(c, shader); 2049 2050 if (0) { 2051 v.emit_dummy_fs(); 2052 } else { 2053 v.emit_interpolation(); 2054 2055 /* Generate FS IR for main(). (the visitor only descends into 2056 * functions called "main"). 2057 */ 2058 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2059 ir_instruction *ir = (ir_instruction *)iter.get(); 2060 v.base_ir = ir; 2061 ir->accept(&v); 2062 } 2063 2064 v.emit_fb_writes(); 2065 v.assign_curb_setup(); 2066 v.assign_urb_setup(); 2067 v.assign_regs(); 2068 } 2069 2070 v.generate_code(); 2071 2072 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2073 2074 if (v.fail) 2075 return GL_FALSE; 2076 2077 if (INTEL_DEBUG & DEBUG_WM) { 2078 const char *last_annotation_string = NULL; 2079 ir_instruction *last_annotation_ir = NULL; 2080 2081 printf("Native code for fragment shader %d:\n", prog->Name); 2082 for (unsigned int i = 0; i < p->nr_insn; i++) { 2083 if (last_annotation_ir != v.annotation_ir[i]) { 2084 last_annotation_ir = v.annotation_ir[i]; 2085 if (last_annotation_ir) { 2086 printf(" "); 2087 last_annotation_ir->print(); 2088 printf("\n"); 2089 } 2090 } 2091 if (last_annotation_string != v.annotation_string[i]) { 2092 last_annotation_string = v.annotation_string[i]; 2093 if (last_annotation_string) 2094 printf(" %s\n", last_annotation_string); 2095 } 2096 brw_disasm(stdout, &p->store[i], intel->gen); 2097 } 2098 printf("\n"); 2099 } 2100 2101 c->prog_data.total_grf = v.grf_used; 2102 c->prog_data.total_scratch = 0; 2103 2104 return GL_TRUE; 2105} 2106