brw_fs.cpp revision 166b3fa29d4b5af8d4e8c410ed71e4348b65bbd9
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31/* Evil hack for using libdrm in a c++ compiler. */ 32#define virtual virt 33#include "i915_drm.h" 34#include "intel_bufmgr.h" 35#undef virtual 36 37#include "main/macros.h" 38#include "main/shaderobj.h" 39#include "program/prog_parameter.h" 40#include "program/prog_print.h" 41#include "program/prog_optimize.h" 42#include "program/hash_table.h" 43#include "brw_context.h" 44#include "brw_eu.h" 45#include "brw_wm.h" 46#include "talloc.h" 47} 48#include "../glsl/glsl_types.h" 49#include "../glsl/ir_optimization.h" 50#include "../glsl/ir_print_visitor.h" 51 52enum register_file { 53 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 54 GRF = BRW_GENERAL_REGISTER_FILE, 55 MRF = BRW_MESSAGE_REGISTER_FILE, 56 IMM = BRW_IMMEDIATE_VALUE, 57 FIXED_HW_REG, /* a struct brw_reg */ 58 UNIFORM, /* prog_data->params[hw_reg] */ 59 BAD_FILE 60}; 61 62enum fs_opcodes { 63 FS_OPCODE_FB_WRITE = 256, 64 FS_OPCODE_RCP, 65 FS_OPCODE_RSQ, 66 FS_OPCODE_SQRT, 67 FS_OPCODE_EXP2, 68 FS_OPCODE_LOG2, 69 FS_OPCODE_POW, 70 FS_OPCODE_SIN, 71 FS_OPCODE_COS, 72 FS_OPCODE_DDX, 73 FS_OPCODE_DDY, 74 FS_OPCODE_LINTERP, 75}; 76 77static int using_new_fs = -1; 78 79struct gl_shader * 80brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 81{ 82 struct brw_shader *shader; 83 84 shader = talloc_zero(NULL, struct brw_shader); 85 if (shader) { 86 shader->base.Type = type; 87 shader->base.Name = name; 88 _mesa_init_shader(ctx, &shader->base); 89 } 90 91 return &shader->base; 92} 93 94struct gl_shader_program * 95brw_new_shader_program(GLcontext *ctx, GLuint name) 96{ 97 struct brw_shader_program *prog; 98 prog = talloc_zero(NULL, struct brw_shader_program); 99 if (prog) { 100 prog->base.Name = name; 101 _mesa_init_shader_program(ctx, &prog->base); 102 } 103 return &prog->base; 104} 105 106GLboolean 107brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 108{ 109 if (!_mesa_ir_compile_shader(ctx, shader)) 110 return GL_FALSE; 111 112 return GL_TRUE; 113} 114 115GLboolean 116brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 117{ 118 if (using_new_fs == -1) 119 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 120 121 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 122 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 123 124 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 125 void *mem_ctx = talloc_new(NULL); 126 bool progress; 127 128 if (shader->ir) 129 talloc_free(shader->ir); 130 shader->ir = new(shader) exec_list; 131 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 132 133 do_mat_op_to_vec(shader->ir); 134 do_div_to_mul_rcp(shader->ir); 135 do_sub_to_add_neg(shader->ir); 136 do_explog_to_explog2(shader->ir); 137 138 brw_do_channel_expressions(shader->ir); 139 brw_do_vector_splitting(shader->ir); 140 141 do { 142 progress = false; 143 144 progress = do_common_optimization(shader->ir, true) || progress; 145 } while (progress); 146 147 validate_ir_tree(shader->ir); 148 149 reparent_ir(shader->ir, shader->ir); 150 talloc_free(mem_ctx); 151 } 152 } 153 154 if (!_mesa_ir_link_shader(ctx, prog)) 155 return GL_FALSE; 156 157 return GL_TRUE; 158} 159 160static int 161type_size(const struct glsl_type *type) 162{ 163 unsigned int size, i; 164 165 switch (type->base_type) { 166 case GLSL_TYPE_UINT: 167 case GLSL_TYPE_INT: 168 case GLSL_TYPE_FLOAT: 169 case GLSL_TYPE_BOOL: 170 return type->components(); 171 case GLSL_TYPE_ARRAY: 172 /* FINISHME: uniform/varying arrays. */ 173 return type_size(type->fields.array) * type->length; 174 case GLSL_TYPE_STRUCT: 175 size = 0; 176 for (i = 0; i < type->length; i++) { 177 size += type_size(type->fields.structure[i].type); 178 } 179 return size; 180 case GLSL_TYPE_SAMPLER: 181 /* Samplers take up no register space, since they're baked in at 182 * link time. 183 */ 184 return 0; 185 default: 186 assert(!"not reached"); 187 return 0; 188 } 189} 190 191class fs_reg { 192public: 193 /* Callers of this talloc-based new need not call delete. It's 194 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 195 static void* operator new(size_t size, void *ctx) 196 { 197 void *node; 198 199 node = talloc_size(ctx, size); 200 assert(node != NULL); 201 202 return node; 203 } 204 205 /** Generic unset register constructor. */ 206 fs_reg() 207 { 208 this->file = BAD_FILE; 209 this->reg = 0; 210 this->reg_offset = 0; 211 this->hw_reg = -1; 212 this->negate = 0; 213 this->abs = 0; 214 } 215 216 /** Immediate value constructor. */ 217 fs_reg(float f) 218 { 219 this->file = IMM; 220 this->reg = 0; 221 this->hw_reg = 0; 222 this->type = BRW_REGISTER_TYPE_F; 223 this->imm.f = f; 224 this->negate = 0; 225 this->abs = 0; 226 } 227 228 /** Immediate value constructor. */ 229 fs_reg(int32_t i) 230 { 231 this->file = IMM; 232 this->reg = 0; 233 this->hw_reg = 0; 234 this->type = BRW_REGISTER_TYPE_D; 235 this->imm.i = i; 236 this->negate = 0; 237 this->abs = 0; 238 } 239 240 /** Immediate value constructor. */ 241 fs_reg(uint32_t u) 242 { 243 this->file = IMM; 244 this->reg = 0; 245 this->hw_reg = 0; 246 this->type = BRW_REGISTER_TYPE_UD; 247 this->imm.u = u; 248 this->negate = 0; 249 this->abs = 0; 250 } 251 252 /** Fixed brw_reg Immediate value constructor. */ 253 fs_reg(struct brw_reg fixed_hw_reg) 254 { 255 this->file = FIXED_HW_REG; 256 this->fixed_hw_reg = fixed_hw_reg; 257 this->reg = 0; 258 this->hw_reg = 0; 259 this->type = fixed_hw_reg.type; 260 this->negate = 0; 261 this->abs = 0; 262 } 263 264 fs_reg(enum register_file file, int hw_reg); 265 fs_reg(class fs_visitor *v, const struct glsl_type *type); 266 267 /** Register file: ARF, GRF, MRF, IMM. */ 268 enum register_file file; 269 /** Abstract register number. 0 = fixed hw reg */ 270 int reg; 271 /** Offset within the abstract register. */ 272 int reg_offset; 273 /** HW register number. Generally unset until register allocation. */ 274 int hw_reg; 275 /** Register type. BRW_REGISTER_TYPE_* */ 276 int type; 277 bool negate; 278 bool abs; 279 struct brw_reg fixed_hw_reg; 280 281 /** Value for file == BRW_IMMMEDIATE_FILE */ 282 union { 283 int32_t i; 284 uint32_t u; 285 float f; 286 } imm; 287}; 288 289static const fs_reg reg_undef; 290static const fs_reg reg_null(ARF, BRW_ARF_NULL); 291 292class fs_inst : public exec_node { 293public: 294 /* Callers of this talloc-based new need not call delete. It's 295 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 296 static void* operator new(size_t size, void *ctx) 297 { 298 void *node; 299 300 node = talloc_zero_size(ctx, size); 301 assert(node != NULL); 302 303 return node; 304 } 305 306 fs_inst() 307 { 308 this->opcode = BRW_OPCODE_NOP; 309 this->saturate = false; 310 this->conditional_mod = BRW_CONDITIONAL_NONE; 311 this->predicated = false; 312 } 313 314 fs_inst(int opcode, fs_reg dst, fs_reg src0) 315 { 316 this->opcode = opcode; 317 this->dst = dst; 318 this->src[0] = src0; 319 this->saturate = false; 320 this->conditional_mod = BRW_CONDITIONAL_NONE; 321 this->predicated = false; 322 } 323 324 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 325 { 326 this->opcode = opcode; 327 this->dst = dst; 328 this->src[0] = src0; 329 this->src[1] = src1; 330 this->saturate = false; 331 this->conditional_mod = BRW_CONDITIONAL_NONE; 332 this->predicated = false; 333 } 334 335 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 336 { 337 this->opcode = opcode; 338 this->dst = dst; 339 this->src[0] = src0; 340 this->src[1] = src1; 341 this->src[2] = src2; 342 this->saturate = false; 343 this->conditional_mod = BRW_CONDITIONAL_NONE; 344 this->predicated = false; 345 } 346 347 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 348 fs_reg dst; 349 fs_reg src[3]; 350 bool saturate; 351 bool predicated; 352 int conditional_mod; /**< BRW_CONDITIONAL_* */ 353 354 /** @{ 355 * Annotation for the generated IR. One of the two can be set. 356 */ 357 ir_instruction *ir; 358 const char *annotation; 359 /** @} */ 360}; 361 362class fs_visitor : public ir_visitor 363{ 364public: 365 366 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 367 { 368 this->c = c; 369 this->p = &c->func; 370 this->brw = p->brw; 371 this->intel = &brw->intel; 372 this->mem_ctx = talloc_new(NULL); 373 this->shader = shader; 374 this->fail = false; 375 this->next_abstract_grf = 1; 376 this->variable_ht = hash_table_ctor(0, 377 hash_table_pointer_hash, 378 hash_table_pointer_compare); 379 380 this->frag_color = NULL; 381 this->frag_data = NULL; 382 this->frag_depth = NULL; 383 this->first_non_payload_grf = 0; 384 385 this->current_annotation = NULL; 386 this->annotation_string = NULL; 387 this->annotation_ir = NULL; 388 } 389 ~fs_visitor() 390 { 391 talloc_free(this->mem_ctx); 392 hash_table_dtor(this->variable_ht); 393 } 394 395 fs_reg *variable_storage(ir_variable *var); 396 397 void visit(ir_variable *ir); 398 void visit(ir_assignment *ir); 399 void visit(ir_dereference_variable *ir); 400 void visit(ir_dereference_record *ir); 401 void visit(ir_dereference_array *ir); 402 void visit(ir_expression *ir); 403 void visit(ir_texture *ir); 404 void visit(ir_if *ir); 405 void visit(ir_constant *ir); 406 void visit(ir_swizzle *ir); 407 void visit(ir_return *ir); 408 void visit(ir_loop *ir); 409 void visit(ir_loop_jump *ir); 410 void visit(ir_discard *ir); 411 void visit(ir_call *ir); 412 void visit(ir_function *ir); 413 void visit(ir_function_signature *ir); 414 415 fs_inst *emit(fs_inst inst); 416 void assign_curb_setup(); 417 void assign_urb_setup(); 418 void assign_regs(); 419 void generate_code(); 420 void generate_fb_write(fs_inst *inst); 421 void generate_linterp(fs_inst *inst, struct brw_reg dst, 422 struct brw_reg *src); 423 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 424 425 void emit_dummy_fs(); 426 void emit_interpolation(); 427 void emit_pinterp(int location); 428 void emit_fb_writes(); 429 430 struct brw_reg interp_reg(int location, int channel); 431 432 struct brw_context *brw; 433 struct intel_context *intel; 434 struct brw_wm_compile *c; 435 struct brw_compile *p; 436 struct brw_shader *shader; 437 void *mem_ctx; 438 exec_list instructions; 439 int next_abstract_grf; 440 struct hash_table *variable_ht; 441 ir_variable *frag_color, *frag_data, *frag_depth; 442 int first_non_payload_grf; 443 444 /** @{ debug annotation info */ 445 const char *current_annotation; 446 ir_instruction *base_ir; 447 const char **annotation_string; 448 ir_instruction **annotation_ir; 449 /** @} */ 450 451 bool fail; 452 453 /* Result of last visit() method. */ 454 fs_reg result; 455 456 fs_reg pixel_x; 457 fs_reg pixel_y; 458 fs_reg pixel_w; 459 fs_reg delta_x; 460 fs_reg delta_y; 461 fs_reg interp_attrs[64]; 462 463 int grf_used; 464 465}; 466 467/** Fixed HW reg constructor. */ 468fs_reg::fs_reg(enum register_file file, int hw_reg) 469{ 470 this->file = file; 471 this->reg = 0; 472 this->reg_offset = 0; 473 this->hw_reg = hw_reg; 474 this->type = BRW_REGISTER_TYPE_F; 475 this->negate = 0; 476 this->abs = 0; 477} 478 479/** Automatic reg constructor. */ 480fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 481{ 482 this->file = GRF; 483 this->reg = v->next_abstract_grf; 484 this->reg_offset = 0; 485 v->next_abstract_grf += type_size(type); 486 this->hw_reg = -1; 487 this->negate = 0; 488 this->abs = 0; 489 490 switch (type->base_type) { 491 case GLSL_TYPE_FLOAT: 492 this->type = BRW_REGISTER_TYPE_F; 493 break; 494 case GLSL_TYPE_INT: 495 case GLSL_TYPE_BOOL: 496 this->type = BRW_REGISTER_TYPE_D; 497 break; 498 case GLSL_TYPE_UINT: 499 this->type = BRW_REGISTER_TYPE_UD; 500 break; 501 default: 502 assert(!"not reached"); 503 this->type = BRW_REGISTER_TYPE_F; 504 break; 505 } 506} 507 508fs_reg * 509fs_visitor::variable_storage(ir_variable *var) 510{ 511 return (fs_reg *)hash_table_find(this->variable_ht, var); 512} 513 514void 515fs_visitor::visit(ir_variable *ir) 516{ 517 fs_reg *reg = NULL; 518 519 if (strcmp(ir->name, "gl_FragColor") == 0) { 520 this->frag_color = ir; 521 } else if (strcmp(ir->name, "gl_FragData") == 0) { 522 this->frag_data = ir; 523 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 524 this->frag_depth = ir; 525 assert(!"FINISHME: this hangs currently."); 526 } 527 528 if (ir->mode == ir_var_in) { 529 reg = &this->interp_attrs[ir->location]; 530 } 531 532 if (ir->mode == ir_var_uniform) { 533 const float *vec_values; 534 int param_index = c->prog_data.nr_params; 535 536 /* FINISHME: This is wildly incomplete. */ 537 assert(ir->type->is_scalar() || ir->type->is_vector()); 538 539 const struct gl_program *fp = &this->brw->fragment_program->Base; 540 /* Our support for uniforms is piggy-backed on the struct 541 * gl_fragment_program, because that's where the values actually 542 * get stored, rather than in some global gl_shader_program uniform 543 * store. 544 */ 545 vec_values = fp->Parameters->ParameterValues[ir->location]; 546 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 547 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 548 } 549 550 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 551 } 552 553 if (!reg) 554 reg = new(this->mem_ctx) fs_reg(this, ir->type); 555 556 hash_table_insert(this->variable_ht, reg, ir); 557} 558 559void 560fs_visitor::visit(ir_dereference_variable *ir) 561{ 562 fs_reg *reg = variable_storage(ir->var); 563 this->result = *reg; 564} 565 566void 567fs_visitor::visit(ir_dereference_record *ir) 568{ 569 assert(!"FINISHME"); 570} 571 572void 573fs_visitor::visit(ir_dereference_array *ir) 574{ 575 ir_constant *index; 576 int element_size; 577 578 ir->array->accept(this); 579 index = ir->array_index->as_constant(); 580 581 if (ir->type->is_matrix()) { 582 element_size = ir->type->vector_elements; 583 } else { 584 element_size = type_size(ir->type); 585 } 586 587 if (index) { 588 assert(this->result.file == UNIFORM || 589 (this->result.file == GRF && 590 this->result.reg != 0)); 591 this->result.reg_offset += index->value.i[0] * element_size; 592 } else { 593 assert(!"FINISHME: non-constant matrix column"); 594 } 595} 596 597void 598fs_visitor::visit(ir_expression *ir) 599{ 600 unsigned int operand; 601 fs_reg op[2], temp; 602 fs_reg result; 603 fs_inst *inst; 604 605 for (operand = 0; operand < ir->get_num_operands(); operand++) { 606 ir->operands[operand]->accept(this); 607 if (this->result.file == BAD_FILE) { 608 ir_print_visitor v; 609 printf("Failed to get tree for expression operand:\n"); 610 ir->operands[operand]->accept(&v); 611 this->fail = true; 612 } 613 op[operand] = this->result; 614 615 /* Matrix expression operands should have been broken down to vector 616 * operations already. 617 */ 618 assert(!ir->operands[operand]->type->is_matrix()); 619 /* And then those vector operands should have been broken down to scalar. 620 */ 621 assert(!ir->operands[operand]->type->is_vector()); 622 } 623 624 /* Storage for our result. If our result goes into an assignment, it will 625 * just get copy-propagated out, so no worries. 626 */ 627 this->result = fs_reg(this, ir->type); 628 629 switch (ir->operation) { 630 case ir_unop_logic_not: 631 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 632 break; 633 case ir_unop_neg: 634 this->result = op[0]; 635 op[0].negate = ~op[0].negate; 636 break; 637 case ir_unop_abs: 638 this->result = op[0]; 639 op[0].abs = true; 640 break; 641 case ir_unop_sign: 642 temp = fs_reg(this, ir->type); 643 644 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 645 inst->conditional_mod = BRW_CONDITIONAL_G; 646 647 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); 648 inst->conditional_mod = BRW_CONDITIONAL_L; 649 650 temp.negate = true; 651 emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp)); 652 653 break; 654 case ir_unop_rcp: 655 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 656 break; 657 658 case ir_unop_exp2: 659 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 660 break; 661 case ir_unop_log2: 662 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 663 break; 664 case ir_unop_exp: 665 case ir_unop_log: 666 assert(!"not reached: should be handled by ir_explog_to_explog2"); 667 break; 668 case ir_unop_sin: 669 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 670 break; 671 case ir_unop_cos: 672 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 673 break; 674 675 case ir_unop_dFdx: 676 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 677 break; 678 case ir_unop_dFdy: 679 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 680 break; 681 682 case ir_binop_add: 683 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 684 break; 685 case ir_binop_sub: 686 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 687 break; 688 689 case ir_binop_mul: 690 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 691 break; 692 case ir_binop_div: 693 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 694 break; 695 case ir_binop_mod: 696 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 697 break; 698 699 case ir_binop_less: 700 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 701 inst->conditional_mod = BRW_CONDITIONAL_L; 702 break; 703 case ir_binop_greater: 704 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 705 inst->conditional_mod = BRW_CONDITIONAL_G; 706 break; 707 case ir_binop_lequal: 708 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 709 inst->conditional_mod = BRW_CONDITIONAL_LE; 710 break; 711 case ir_binop_gequal: 712 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 713 inst->conditional_mod = BRW_CONDITIONAL_GE; 714 break; 715 case ir_binop_equal: 716 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 717 inst->conditional_mod = BRW_CONDITIONAL_Z; 718 break; 719 case ir_binop_nequal: 720 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 721 inst->conditional_mod = BRW_CONDITIONAL_NZ; 722 break; 723 724 case ir_binop_logic_xor: 725 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 726 break; 727 728 case ir_binop_logic_or: 729 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 730 break; 731 732 case ir_binop_logic_and: 733 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 734 break; 735 736 case ir_binop_dot: 737 case ir_binop_cross: 738 case ir_unop_any: 739 assert(!"not reached: should be handled by brw_channel_expressions"); 740 break; 741 742 case ir_unop_sqrt: 743 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 744 break; 745 746 case ir_unop_rsq: 747 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 748 break; 749 750 case ir_unop_i2f: 751 case ir_unop_b2f: 752 case ir_unop_b2i: 753 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 754 break; 755 case ir_unop_f2i: 756 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); 757 break; 758 case ir_unop_f2b: 759 case ir_unop_i2b: 760 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 761 inst->conditional_mod = BRW_CONDITIONAL_NZ; 762 763 case ir_unop_trunc: 764 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 765 break; 766 case ir_unop_ceil: 767 op[0].negate = ~op[0].negate; 768 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 769 this->result.negate = true; 770 break; 771 case ir_unop_floor: 772 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 773 break; 774 case ir_unop_fract: 775 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 776 break; 777 778 case ir_binop_min: 779 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 780 inst->conditional_mod = BRW_CONDITIONAL_L; 781 782 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 783 inst->predicated = true; 784 break; 785 case ir_binop_max: 786 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 787 inst->conditional_mod = BRW_CONDITIONAL_G; 788 789 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 790 inst->predicated = true; 791 break; 792 793 case ir_binop_pow: 794 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 795 break; 796 797 case ir_unop_bit_not: 798 case ir_unop_u2f: 799 case ir_binop_lshift: 800 case ir_binop_rshift: 801 case ir_binop_bit_and: 802 case ir_binop_bit_xor: 803 case ir_binop_bit_or: 804 assert(!"GLSL 1.30 features unsupported"); 805 break; 806 } 807} 808 809void 810fs_visitor::visit(ir_assignment *ir) 811{ 812 struct fs_reg l, r; 813 int i; 814 int write_mask; 815 fs_inst *inst; 816 817 /* FINISHME: arrays on the lhs */ 818 ir->lhs->accept(this); 819 l = this->result; 820 821 ir->rhs->accept(this); 822 r = this->result; 823 824 /* FINISHME: This should really set to the correct maximal writemask for each 825 * FINISHME: component written (in the loops below). This case can only 826 * FINISHME: occur for matrices, arrays, and structures. 827 */ 828 if (ir->write_mask == 0) { 829 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 830 write_mask = WRITEMASK_XYZW; 831 } else { 832 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 833 write_mask = ir->write_mask; 834 } 835 836 assert(l.file != BAD_FILE); 837 assert(r.file != BAD_FILE); 838 839 if (ir->condition) { 840 /* Get the condition bool into the predicate. */ 841 ir->condition->accept(this); 842 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0))); 843 inst->conditional_mod = BRW_CONDITIONAL_NZ; 844 } 845 846 for (i = 0; i < type_size(ir->lhs->type); i++) { 847 if (i >= 4 || (write_mask & (1 << i))) { 848 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 849 if (ir->condition) 850 inst->predicated = true; 851 } 852 l.reg_offset++; 853 r.reg_offset++; 854 } 855} 856 857void 858fs_visitor::visit(ir_texture *ir) 859{ 860 assert(!"FINISHME"); 861} 862 863void 864fs_visitor::visit(ir_swizzle *ir) 865{ 866 ir->val->accept(this); 867 fs_reg val = this->result; 868 869 fs_reg result = fs_reg(this, ir->type); 870 this->result = result; 871 872 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 873 fs_reg channel = val; 874 int swiz = 0; 875 876 switch (i) { 877 case 0: 878 swiz = ir->mask.x; 879 break; 880 case 1: 881 swiz = ir->mask.y; 882 break; 883 case 2: 884 swiz = ir->mask.z; 885 break; 886 case 3: 887 swiz = ir->mask.w; 888 break; 889 } 890 891 channel.reg_offset += swiz; 892 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 893 result.reg_offset++; 894 } 895} 896 897void 898fs_visitor::visit(ir_discard *ir) 899{ 900 assert(!"FINISHME"); 901} 902 903void 904fs_visitor::visit(ir_constant *ir) 905{ 906 fs_reg reg(this, ir->type); 907 this->result = reg; 908 909 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 910 switch (ir->type->base_type) { 911 case GLSL_TYPE_FLOAT: 912 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 913 break; 914 case GLSL_TYPE_UINT: 915 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 916 break; 917 case GLSL_TYPE_INT: 918 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 919 break; 920 case GLSL_TYPE_BOOL: 921 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 922 break; 923 default: 924 assert(!"Non-float/uint/int/bool constant"); 925 } 926 reg.reg_offset++; 927 } 928} 929 930void 931fs_visitor::visit(ir_if *ir) 932{ 933 assert(!"FINISHME"); 934} 935 936void 937fs_visitor::visit(ir_loop *ir) 938{ 939 assert(!"FINISHME"); 940} 941 942void 943fs_visitor::visit(ir_loop_jump *ir) 944{ 945 assert(!"FINISHME"); 946} 947 948void 949fs_visitor::visit(ir_call *ir) 950{ 951 assert(!"FINISHME"); 952} 953 954void 955fs_visitor::visit(ir_return *ir) 956{ 957 assert(!"FINISHME"); 958} 959 960void 961fs_visitor::visit(ir_function *ir) 962{ 963 /* Ignore function bodies other than main() -- we shouldn't see calls to 964 * them since they should all be inlined before we get to ir_to_mesa. 965 */ 966 if (strcmp(ir->name, "main") == 0) { 967 const ir_function_signature *sig; 968 exec_list empty; 969 970 sig = ir->matching_signature(&empty); 971 972 assert(sig); 973 974 foreach_iter(exec_list_iterator, iter, sig->body) { 975 ir_instruction *ir = (ir_instruction *)iter.get(); 976 this->base_ir = ir; 977 978 ir->accept(this); 979 } 980 } 981} 982 983void 984fs_visitor::visit(ir_function_signature *ir) 985{ 986 assert(!"not reached"); 987 (void)ir; 988} 989 990fs_inst * 991fs_visitor::emit(fs_inst inst) 992{ 993 fs_inst *list_inst = new(mem_ctx) fs_inst; 994 *list_inst = inst; 995 996 list_inst->annotation = this->current_annotation; 997 list_inst->ir = this->base_ir; 998 999 this->instructions.push_tail(list_inst); 1000 1001 return list_inst; 1002} 1003 1004/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1005void 1006fs_visitor::emit_dummy_fs() 1007{ 1008 /* Everyone's favorite color. */ 1009 emit(fs_inst(BRW_OPCODE_MOV, 1010 fs_reg(MRF, 2), 1011 fs_reg(1.0f))); 1012 emit(fs_inst(BRW_OPCODE_MOV, 1013 fs_reg(MRF, 3), 1014 fs_reg(0.0f))); 1015 emit(fs_inst(BRW_OPCODE_MOV, 1016 fs_reg(MRF, 4), 1017 fs_reg(1.0f))); 1018 emit(fs_inst(BRW_OPCODE_MOV, 1019 fs_reg(MRF, 5), 1020 fs_reg(0.0f))); 1021 1022 fs_inst *write; 1023 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1024 fs_reg(0), 1025 fs_reg(0))); 1026} 1027 1028/* The register location here is relative to the start of the URB 1029 * data. It will get adjusted to be a real location before 1030 * generate_code() time. 1031 */ 1032struct brw_reg 1033fs_visitor::interp_reg(int location, int channel) 1034{ 1035 int regnr = location * 2 + channel / 2; 1036 int stride = (channel & 1) * 4; 1037 1038 return brw_vec1_grf(regnr, stride); 1039} 1040 1041/** Emits the interpolation for the varying inputs. */ 1042void 1043fs_visitor::emit_interpolation() 1044{ 1045 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1046 /* For now, the source regs for the setup URB data will be unset, 1047 * since we don't know until codegen how many push constants we'll 1048 * use, and therefore what the setup URB offset is. 1049 */ 1050 fs_reg src_reg = reg_undef; 1051 1052 this->current_annotation = "compute pixel centers"; 1053 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1054 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1055 emit(fs_inst(BRW_OPCODE_ADD, 1056 this->pixel_x, 1057 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1058 fs_reg(brw_imm_v(0x10101010)))); 1059 emit(fs_inst(BRW_OPCODE_ADD, 1060 this->pixel_y, 1061 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1062 fs_reg(brw_imm_v(0x11001100)))); 1063 1064 this->current_annotation = "compute pixel deltas from v0"; 1065 this->delta_x = fs_reg(this, glsl_type::float_type); 1066 this->delta_y = fs_reg(this, glsl_type::float_type); 1067 emit(fs_inst(BRW_OPCODE_ADD, 1068 this->delta_x, 1069 this->pixel_x, 1070 fs_reg(negate(brw_vec1_grf(1, 0))))); 1071 emit(fs_inst(BRW_OPCODE_ADD, 1072 this->delta_y, 1073 this->pixel_y, 1074 fs_reg(brw_vec1_grf(1, 1)))); 1075 1076 this->current_annotation = "compute pos.w and 1/pos.w"; 1077 /* Compute wpos. Unlike many other varying inputs, we usually need it 1078 * to produce 1/w, and the varying variable wouldn't show up. 1079 */ 1080 fs_reg wpos = fs_reg(this, glsl_type::vec4_type); 1081 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos; 1082 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */ 1083 wpos.reg_offset++; 1084 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */ 1085 wpos.reg_offset++; 1086 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1087 interp_reg(FRAG_ATTRIB_WPOS, 2))); 1088 wpos.reg_offset++; 1089 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1090 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1091 /* Compute the pixel W value from wpos.w. */ 1092 this->pixel_w = fs_reg(this, glsl_type::float_type); 1093 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos)); 1094 1095 /* FINISHME: gl_FrontFacing */ 1096 1097 foreach_iter(exec_list_iterator, iter, *this->shader->ir) { 1098 ir_instruction *ir = (ir_instruction *)iter.get(); 1099 ir_variable *var = ir->as_variable(); 1100 1101 if (!var) 1102 continue; 1103 1104 if (var->mode != ir_var_in) 1105 continue; 1106 1107 /* If it's already set up (WPOS), skip. */ 1108 if (var->location == 0) 1109 continue; 1110 1111 this->current_annotation = talloc_asprintf(this->mem_ctx, 1112 "interpolate %s " 1113 "(FRAG_ATTRIB[%d])", 1114 var->name, 1115 var->location); 1116 emit_pinterp(var->location); 1117 } 1118 this->current_annotation = NULL; 1119} 1120 1121void 1122fs_visitor::emit_pinterp(int location) 1123{ 1124 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type); 1125 this->interp_attrs[location] = interp_attr; 1126 1127 for (unsigned int i = 0; i < 4; i++) { 1128 struct brw_reg interp = interp_reg(location, i); 1129 emit(fs_inst(FS_OPCODE_LINTERP, 1130 interp_attr, 1131 this->delta_x, 1132 this->delta_y, 1133 fs_reg(interp))); 1134 interp_attr.reg_offset++; 1135 } 1136 interp_attr.reg_offset -= 4; 1137 1138 for (unsigned int i = 0; i < 4; i++) { 1139 emit(fs_inst(BRW_OPCODE_MUL, 1140 interp_attr, 1141 interp_attr, 1142 this->pixel_w)); 1143 interp_attr.reg_offset++; 1144 } 1145} 1146 1147void 1148fs_visitor::emit_fb_writes() 1149{ 1150 this->current_annotation = "FB write"; 1151 1152 assert(this->frag_color || !"FINISHME: MRT"); 1153 fs_reg color = *(variable_storage(this->frag_color)); 1154 1155 for (int i = 0; i < 4; i++) { 1156 emit(fs_inst(BRW_OPCODE_MOV, 1157 fs_reg(MRF, 2 + i), 1158 color)); 1159 color.reg_offset++; 1160 } 1161 1162 emit(fs_inst(FS_OPCODE_FB_WRITE, 1163 fs_reg(0), 1164 fs_reg(0))); 1165 1166 this->current_annotation = NULL; 1167} 1168 1169void 1170fs_visitor::generate_fb_write(fs_inst *inst) 1171{ 1172 GLboolean eot = 1; /* FINISHME: MRT */ 1173 /* FINISHME: AADS */ 1174 1175 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1176 * move, here's g1. 1177 */ 1178 brw_push_insn_state(p); 1179 brw_set_mask_control(p, BRW_MASK_DISABLE); 1180 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1181 brw_MOV(p, 1182 brw_message_reg(1), 1183 brw_vec8_grf(1, 0)); 1184 brw_pop_insn_state(p); 1185 1186 int nr = 2 + 4; 1187 1188 brw_fb_WRITE(p, 1189 8, /* dispatch_width */ 1190 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1191 0, /* base MRF */ 1192 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1193 0, /* FINISHME: MRT target */ 1194 nr, 1195 0, 1196 eot); 1197} 1198 1199void 1200fs_visitor::generate_linterp(fs_inst *inst, 1201 struct brw_reg dst, struct brw_reg *src) 1202{ 1203 struct brw_reg delta_x = src[0]; 1204 struct brw_reg delta_y = src[1]; 1205 struct brw_reg interp = src[2]; 1206 1207 if (brw->has_pln && 1208 delta_y.nr == delta_x.nr + 1 && 1209 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1210 brw_PLN(p, dst, interp, delta_x); 1211 } else { 1212 brw_LINE(p, brw_null_reg(), interp, delta_x); 1213 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1214 } 1215} 1216 1217void 1218fs_visitor::generate_math(fs_inst *inst, 1219 struct brw_reg dst, struct brw_reg *src) 1220{ 1221 int op; 1222 1223 switch (inst->opcode) { 1224 case FS_OPCODE_RCP: 1225 op = BRW_MATH_FUNCTION_INV; 1226 break; 1227 case FS_OPCODE_RSQ: 1228 op = BRW_MATH_FUNCTION_RSQ; 1229 break; 1230 case FS_OPCODE_SQRT: 1231 op = BRW_MATH_FUNCTION_SQRT; 1232 break; 1233 case FS_OPCODE_EXP2: 1234 op = BRW_MATH_FUNCTION_EXP; 1235 break; 1236 case FS_OPCODE_LOG2: 1237 op = BRW_MATH_FUNCTION_LOG; 1238 break; 1239 case FS_OPCODE_POW: 1240 op = BRW_MATH_FUNCTION_POW; 1241 break; 1242 case FS_OPCODE_SIN: 1243 op = BRW_MATH_FUNCTION_SIN; 1244 break; 1245 case FS_OPCODE_COS: 1246 op = BRW_MATH_FUNCTION_COS; 1247 break; 1248 default: 1249 assert(!"not reached: unknown math function"); 1250 op = 0; 1251 break; 1252 } 1253 1254 if (inst->opcode == FS_OPCODE_POW) { 1255 brw_MOV(p, brw_message_reg(3), src[1]); 1256 } 1257 1258 brw_math(p, dst, 1259 op, 1260 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1261 BRW_MATH_SATURATE_NONE, 1262 2, src[0], 1263 BRW_MATH_DATA_VECTOR, 1264 BRW_MATH_PRECISION_FULL); 1265} 1266 1267static void 1268trivial_assign_reg(int header_size, fs_reg *reg) 1269{ 1270 if (reg->file == GRF && reg->reg != 0) { 1271 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; 1272 reg->reg = 0; 1273 } 1274} 1275 1276void 1277fs_visitor::assign_curb_setup() 1278{ 1279 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1280 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1281 1282 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1283 foreach_iter(exec_list_iterator, iter, this->instructions) { 1284 fs_inst *inst = (fs_inst *)iter.get(); 1285 1286 for (unsigned int i = 0; i < 3; i++) { 1287 if (inst->src[i].file == UNIFORM) { 1288 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1289 struct brw_reg brw_reg; 1290 1291 brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1292 constant_nr / 8, 1293 constant_nr % 8); 1294 inst->src[i] = fs_reg(brw_reg); 1295 } 1296 } 1297 } 1298} 1299 1300void 1301fs_visitor::assign_urb_setup() 1302{ 1303 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1304 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1305 1306 c->prog_data.urb_read_length = 0; 1307 1308 /* Figure out where each of the incoming setup attributes lands. */ 1309 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1310 interp_reg_nr[i] = -1; 1311 1312 if (i != FRAG_ATTRIB_WPOS && 1313 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1314 continue; 1315 1316 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1317 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1318 c->prog_data.urb_read_length += 2; 1319 } 1320 1321 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1322 * the correct setup input. 1323 */ 1324 foreach_iter(exec_list_iterator, iter, this->instructions) { 1325 fs_inst *inst = (fs_inst *)iter.get(); 1326 1327 if (inst->opcode != FS_OPCODE_LINTERP) 1328 continue; 1329 1330 assert(inst->src[2].file == FIXED_HW_REG); 1331 1332 int location = inst->src[2].fixed_hw_reg.nr / 2; 1333 assert(interp_reg_nr[location] != -1); 1334 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1335 (inst->src[2].fixed_hw_reg.nr & 1)); 1336 } 1337 1338 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1339} 1340 1341void 1342fs_visitor::assign_regs() 1343{ 1344 int header_size = this->first_non_payload_grf; 1345 int last_grf = 0; 1346 1347 /* FINISHME: trivial assignment of register numbers */ 1348 foreach_iter(exec_list_iterator, iter, this->instructions) { 1349 fs_inst *inst = (fs_inst *)iter.get(); 1350 1351 trivial_assign_reg(header_size, &inst->dst); 1352 trivial_assign_reg(header_size, &inst->src[0]); 1353 trivial_assign_reg(header_size, &inst->src[1]); 1354 1355 last_grf = MAX2(last_grf, inst->dst.hw_reg); 1356 last_grf = MAX2(last_grf, inst->src[0].hw_reg); 1357 last_grf = MAX2(last_grf, inst->src[1].hw_reg); 1358 } 1359 1360 this->grf_used = last_grf; 1361} 1362 1363static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 1364{ 1365 struct brw_reg brw_reg; 1366 1367 switch (reg->file) { 1368 case GRF: 1369 case ARF: 1370 case MRF: 1371 brw_reg = brw_vec8_reg(reg->file, 1372 reg->hw_reg, 0); 1373 brw_reg = retype(brw_reg, reg->type); 1374 break; 1375 case IMM: 1376 switch (reg->type) { 1377 case BRW_REGISTER_TYPE_F: 1378 brw_reg = brw_imm_f(reg->imm.f); 1379 break; 1380 case BRW_REGISTER_TYPE_D: 1381 brw_reg = brw_imm_f(reg->imm.i); 1382 break; 1383 case BRW_REGISTER_TYPE_UD: 1384 brw_reg = brw_imm_f(reg->imm.u); 1385 break; 1386 default: 1387 assert(!"not reached"); 1388 break; 1389 } 1390 break; 1391 case FIXED_HW_REG: 1392 brw_reg = reg->fixed_hw_reg; 1393 break; 1394 case BAD_FILE: 1395 /* Probably unused. */ 1396 brw_reg = brw_null_reg(); 1397 break; 1398 case UNIFORM: 1399 assert(!"not reached"); 1400 brw_reg = brw_null_reg(); 1401 break; 1402 } 1403 if (reg->abs) 1404 brw_reg = brw_abs(brw_reg); 1405 if (reg->negate) 1406 brw_reg = negate(brw_reg); 1407 1408 return brw_reg; 1409} 1410 1411void 1412fs_visitor::generate_code() 1413{ 1414 unsigned int annotation_len = 0; 1415 int last_native_inst = 0; 1416 1417 foreach_iter(exec_list_iterator, iter, this->instructions) { 1418 fs_inst *inst = (fs_inst *)iter.get(); 1419 struct brw_reg src[3], dst; 1420 1421 for (unsigned int i = 0; i < 3; i++) { 1422 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 1423 } 1424 dst = brw_reg_from_fs_reg(&inst->dst); 1425 1426 brw_set_conditionalmod(p, inst->conditional_mod); 1427 brw_set_predicate_control(p, inst->predicated); 1428 1429 switch (inst->opcode) { 1430 case BRW_OPCODE_MOV: 1431 brw_MOV(p, dst, src[0]); 1432 break; 1433 case BRW_OPCODE_ADD: 1434 brw_ADD(p, dst, src[0], src[1]); 1435 break; 1436 case BRW_OPCODE_MUL: 1437 brw_MUL(p, dst, src[0], src[1]); 1438 break; 1439 case FS_OPCODE_RCP: 1440 case FS_OPCODE_RSQ: 1441 case FS_OPCODE_SQRT: 1442 case FS_OPCODE_EXP2: 1443 case FS_OPCODE_LOG2: 1444 case FS_OPCODE_POW: 1445 case FS_OPCODE_SIN: 1446 case FS_OPCODE_COS: 1447 generate_math(inst, dst, src); 1448 break; 1449 case FS_OPCODE_LINTERP: 1450 generate_linterp(inst, dst, src); 1451 break; 1452 case FS_OPCODE_FB_WRITE: 1453 generate_fb_write(inst); 1454 break; 1455 default: 1456 assert(!"not reached"); 1457 } 1458 1459 if (annotation_len < p->nr_insn) { 1460 annotation_len *= 2; 1461 if (annotation_len < 16) 1462 annotation_len = 16; 1463 1464 this->annotation_string = talloc_realloc(this->mem_ctx, 1465 annotation_string, 1466 const char *, 1467 annotation_len); 1468 this->annotation_ir = talloc_realloc(this->mem_ctx, 1469 annotation_ir, 1470 ir_instruction *, 1471 annotation_len); 1472 } 1473 1474 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 1475 this->annotation_string[i] = inst->annotation; 1476 this->annotation_ir[i] = inst->ir; 1477 } 1478 last_native_inst = p->nr_insn; 1479 } 1480} 1481 1482GLboolean 1483brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 1484{ 1485 struct brw_compile *p = &c->func; 1486 struct intel_context *intel = &brw->intel; 1487 GLcontext *ctx = &intel->ctx; 1488 struct brw_shader *shader = NULL; 1489 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 1490 1491 if (!prog) 1492 return GL_FALSE; 1493 1494 if (!using_new_fs) 1495 return GL_FALSE; 1496 1497 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 1498 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 1499 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 1500 break; 1501 } 1502 } 1503 if (!shader) 1504 return GL_FALSE; 1505 1506 /* We always use 8-wide mode, at least for now. For one, flow 1507 * control only works in 8-wide. Also, when we're fragment shader 1508 * bound, we're almost always under register pressure as well, so 1509 * 8-wide would save us from the performance cliff of spilling 1510 * regs. 1511 */ 1512 c->dispatch_width = 8; 1513 1514 if (INTEL_DEBUG & DEBUG_WM) { 1515 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 1516 _mesa_print_ir(shader->ir, NULL); 1517 printf("\n"); 1518 } 1519 1520 /* Now the main event: Visit the shader IR and generate our FS IR for it. 1521 */ 1522 fs_visitor v(c, shader); 1523 1524 if (0) { 1525 v.emit_dummy_fs(); 1526 } else { 1527 v.emit_interpolation(); 1528 1529 /* Generate FS IR for main(). (the visitor only descends into 1530 * functions called "main"). 1531 */ 1532 foreach_iter(exec_list_iterator, iter, *shader->ir) { 1533 ir_instruction *ir = (ir_instruction *)iter.get(); 1534 v.base_ir = ir; 1535 ir->accept(&v); 1536 } 1537 1538 if (v.fail) 1539 return GL_FALSE; 1540 1541 v.emit_fb_writes(); 1542 v.assign_curb_setup(); 1543 v.assign_urb_setup(); 1544 v.assign_regs(); 1545 } 1546 1547 v.generate_code(); 1548 1549 if (INTEL_DEBUG & DEBUG_WM) { 1550 const char *last_annotation_string = NULL; 1551 ir_instruction *last_annotation_ir = NULL; 1552 1553 printf("Native code for fragment shader %d:\n", prog->Name); 1554 for (unsigned int i = 0; i < p->nr_insn; i++) { 1555 if (last_annotation_ir != v.annotation_ir[i]) { 1556 last_annotation_ir = v.annotation_ir[i]; 1557 if (last_annotation_ir) { 1558 printf(" "); 1559 last_annotation_ir->print(); 1560 printf("\n"); 1561 } 1562 } 1563 if (last_annotation_string != v.annotation_string[i]) { 1564 last_annotation_string = v.annotation_string[i]; 1565 if (last_annotation_string) 1566 printf(" %s\n", last_annotation_string); 1567 } 1568 brw_disasm(stdout, &p->store[i], intel->gen); 1569 } 1570 printf("\n"); 1571 } 1572 1573 c->prog_data.total_grf = v.grf_used; 1574 c->prog_data.total_scratch = 0; 1575 1576 return GL_TRUE; 1577} 1578