brw_fs.cpp revision ff0eb45f47ebf2fcc1af06a8b6b934c79dff1d41
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "program/prog_parameter.h" 35#include "program/prog_print.h" 36#include "program/prog_optimize.h" 37#include "program/sampler.h" 38#include "program/hash_table.h" 39#include "brw_context.h" 40#include "brw_eu.h" 41#include "brw_wm.h" 42#include "talloc.h" 43} 44#include "../glsl/glsl_types.h" 45#include "../glsl/ir_optimization.h" 46#include "../glsl/ir_print_visitor.h" 47 48enum register_file { 49 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 50 GRF = BRW_GENERAL_REGISTER_FILE, 51 MRF = BRW_MESSAGE_REGISTER_FILE, 52 IMM = BRW_IMMEDIATE_VALUE, 53 FIXED_HW_REG, /* a struct brw_reg */ 54 UNIFORM, /* prog_data->params[hw_reg] */ 55 BAD_FILE 56}; 57 58enum fs_opcodes { 59 FS_OPCODE_FB_WRITE = 256, 60 FS_OPCODE_RCP, 61 FS_OPCODE_RSQ, 62 FS_OPCODE_SQRT, 63 FS_OPCODE_EXP2, 64 FS_OPCODE_LOG2, 65 FS_OPCODE_POW, 66 FS_OPCODE_SIN, 67 FS_OPCODE_COS, 68 FS_OPCODE_DDX, 69 FS_OPCODE_DDY, 70 FS_OPCODE_LINTERP, 71 FS_OPCODE_TEX, 72 FS_OPCODE_TXB, 73 FS_OPCODE_TXL, 74 FS_OPCODE_DISCARD, 75}; 76 77static int using_new_fs = -1; 78static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 79 80struct gl_shader * 81brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 82{ 83 struct brw_shader *shader; 84 85 shader = talloc_zero(NULL, struct brw_shader); 86 if (shader) { 87 shader->base.Type = type; 88 shader->base.Name = name; 89 _mesa_init_shader(ctx, &shader->base); 90 } 91 92 return &shader->base; 93} 94 95struct gl_shader_program * 96brw_new_shader_program(GLcontext *ctx, GLuint name) 97{ 98 struct brw_shader_program *prog; 99 prog = talloc_zero(NULL, struct brw_shader_program); 100 if (prog) { 101 prog->base.Name = name; 102 _mesa_init_shader_program(ctx, &prog->base); 103 } 104 return &prog->base; 105} 106 107GLboolean 108brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 109{ 110 if (!_mesa_ir_compile_shader(ctx, shader)) 111 return GL_FALSE; 112 113 return GL_TRUE; 114} 115 116GLboolean 117brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 118{ 119 if (using_new_fs == -1) 120 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 121 122 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 123 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 124 125 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 126 void *mem_ctx = talloc_new(NULL); 127 bool progress; 128 129 if (shader->ir) 130 talloc_free(shader->ir); 131 shader->ir = new(shader) exec_list; 132 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 133 134 do_mat_op_to_vec(shader->ir); 135 do_mod_to_fract(shader->ir); 136 do_div_to_mul_rcp(shader->ir); 137 do_sub_to_add_neg(shader->ir); 138 do_explog_to_explog2(shader->ir); 139 140 do { 141 progress = false; 142 143 brw_do_channel_expressions(shader->ir); 144 brw_do_vector_splitting(shader->ir); 145 146 progress = do_lower_jumps(shader->ir, true, true, 147 true, /* main return */ 148 false, /* continue */ 149 false /* loops */ 150 ) || progress; 151 152 progress = do_common_optimization(shader->ir, true, 32) || progress; 153 154 progress = lower_noise(shader->ir) || progress; 155 progress = 156 lower_variable_index_to_cond_assign(shader->ir, 157 GL_TRUE, /* input */ 158 GL_TRUE, /* output */ 159 GL_TRUE, /* temp */ 160 GL_TRUE /* uniform */ 161 ) || progress; 162 } while (progress); 163 164 validate_ir_tree(shader->ir); 165 166 reparent_ir(shader->ir, shader->ir); 167 talloc_free(mem_ctx); 168 } 169 } 170 171 if (!_mesa_ir_link_shader(ctx, prog)) 172 return GL_FALSE; 173 174 return GL_TRUE; 175} 176 177static int 178type_size(const struct glsl_type *type) 179{ 180 unsigned int size, i; 181 182 switch (type->base_type) { 183 case GLSL_TYPE_UINT: 184 case GLSL_TYPE_INT: 185 case GLSL_TYPE_FLOAT: 186 case GLSL_TYPE_BOOL: 187 return type->components(); 188 case GLSL_TYPE_ARRAY: 189 /* FINISHME: uniform/varying arrays. */ 190 return type_size(type->fields.array) * type->length; 191 case GLSL_TYPE_STRUCT: 192 size = 0; 193 for (i = 0; i < type->length; i++) { 194 size += type_size(type->fields.structure[i].type); 195 } 196 return size; 197 case GLSL_TYPE_SAMPLER: 198 /* Samplers take up no register space, since they're baked in at 199 * link time. 200 */ 201 return 0; 202 default: 203 assert(!"not reached"); 204 return 0; 205 } 206} 207 208class fs_reg { 209public: 210 /* Callers of this talloc-based new need not call delete. It's 211 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 212 static void* operator new(size_t size, void *ctx) 213 { 214 void *node; 215 216 node = talloc_size(ctx, size); 217 assert(node != NULL); 218 219 return node; 220 } 221 222 void init() 223 { 224 this->reg = 0; 225 this->reg_offset = 0; 226 this->negate = 0; 227 this->abs = 0; 228 this->hw_reg = -1; 229 } 230 231 /** Generic unset register constructor. */ 232 fs_reg() 233 { 234 init(); 235 this->file = BAD_FILE; 236 } 237 238 /** Immediate value constructor. */ 239 fs_reg(float f) 240 { 241 init(); 242 this->file = IMM; 243 this->type = BRW_REGISTER_TYPE_F; 244 this->imm.f = f; 245 } 246 247 /** Immediate value constructor. */ 248 fs_reg(int32_t i) 249 { 250 init(); 251 this->file = IMM; 252 this->type = BRW_REGISTER_TYPE_D; 253 this->imm.i = i; 254 } 255 256 /** Immediate value constructor. */ 257 fs_reg(uint32_t u) 258 { 259 init(); 260 this->file = IMM; 261 this->type = BRW_REGISTER_TYPE_UD; 262 this->imm.u = u; 263 } 264 265 /** Fixed brw_reg Immediate value constructor. */ 266 fs_reg(struct brw_reg fixed_hw_reg) 267 { 268 init(); 269 this->file = FIXED_HW_REG; 270 this->fixed_hw_reg = fixed_hw_reg; 271 this->type = fixed_hw_reg.type; 272 } 273 274 fs_reg(enum register_file file, int hw_reg); 275 fs_reg(class fs_visitor *v, const struct glsl_type *type); 276 277 /** Register file: ARF, GRF, MRF, IMM. */ 278 enum register_file file; 279 /** Abstract register number. 0 = fixed hw reg */ 280 int reg; 281 /** Offset within the abstract register. */ 282 int reg_offset; 283 /** HW register number. Generally unset until register allocation. */ 284 int hw_reg; 285 /** Register type. BRW_REGISTER_TYPE_* */ 286 int type; 287 bool negate; 288 bool abs; 289 struct brw_reg fixed_hw_reg; 290 291 /** Value for file == BRW_IMMMEDIATE_FILE */ 292 union { 293 int32_t i; 294 uint32_t u; 295 float f; 296 } imm; 297}; 298 299static const fs_reg reg_undef; 300static const fs_reg reg_null(ARF, BRW_ARF_NULL); 301 302class fs_inst : public exec_node { 303public: 304 /* Callers of this talloc-based new need not call delete. It's 305 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 306 static void* operator new(size_t size, void *ctx) 307 { 308 void *node; 309 310 node = talloc_zero_size(ctx, size); 311 assert(node != NULL); 312 313 return node; 314 } 315 316 void init() 317 { 318 this->opcode = BRW_OPCODE_NOP; 319 this->saturate = false; 320 this->conditional_mod = BRW_CONDITIONAL_NONE; 321 this->predicated = false; 322 this->sampler = 0; 323 this->target = 0; 324 this->eot = false; 325 this->shadow_compare = false; 326 } 327 328 fs_inst() 329 { 330 init(); 331 } 332 333 fs_inst(int opcode) 334 { 335 init(); 336 this->opcode = opcode; 337 } 338 339 fs_inst(int opcode, fs_reg dst, fs_reg src0) 340 { 341 init(); 342 this->opcode = opcode; 343 this->dst = dst; 344 this->src[0] = src0; 345 } 346 347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 348 { 349 init(); 350 this->opcode = opcode; 351 this->dst = dst; 352 this->src[0] = src0; 353 this->src[1] = src1; 354 } 355 356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 357 { 358 init(); 359 this->opcode = opcode; 360 this->dst = dst; 361 this->src[0] = src0; 362 this->src[1] = src1; 363 this->src[2] = src2; 364 } 365 366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 367 fs_reg dst; 368 fs_reg src[3]; 369 bool saturate; 370 bool predicated; 371 int conditional_mod; /**< BRW_CONDITIONAL_* */ 372 373 int mlen; /**< SEND message length */ 374 int sampler; 375 int target; /**< MRT target. */ 376 bool eot; 377 bool shadow_compare; 378 379 /** @{ 380 * Annotation for the generated IR. One of the two can be set. 381 */ 382 ir_instruction *ir; 383 const char *annotation; 384 /** @} */ 385}; 386 387class fs_visitor : public ir_visitor 388{ 389public: 390 391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 392 { 393 this->c = c; 394 this->p = &c->func; 395 this->brw = p->brw; 396 this->fp = brw->fragment_program; 397 this->intel = &brw->intel; 398 this->ctx = &intel->ctx; 399 this->mem_ctx = talloc_new(NULL); 400 this->shader = shader; 401 this->fail = false; 402 this->next_abstract_grf = 1; 403 this->variable_ht = hash_table_ctor(0, 404 hash_table_pointer_hash, 405 hash_table_pointer_compare); 406 407 this->frag_color = NULL; 408 this->frag_data = NULL; 409 this->frag_depth = NULL; 410 this->first_non_payload_grf = 0; 411 412 this->current_annotation = NULL; 413 this->annotation_string = NULL; 414 this->annotation_ir = NULL; 415 this->base_ir = NULL; 416 } 417 ~fs_visitor() 418 { 419 talloc_free(this->mem_ctx); 420 hash_table_dtor(this->variable_ht); 421 } 422 423 fs_reg *variable_storage(ir_variable *var); 424 425 void visit(ir_variable *ir); 426 void visit(ir_assignment *ir); 427 void visit(ir_dereference_variable *ir); 428 void visit(ir_dereference_record *ir); 429 void visit(ir_dereference_array *ir); 430 void visit(ir_expression *ir); 431 void visit(ir_texture *ir); 432 void visit(ir_if *ir); 433 void visit(ir_constant *ir); 434 void visit(ir_swizzle *ir); 435 void visit(ir_return *ir); 436 void visit(ir_loop *ir); 437 void visit(ir_loop_jump *ir); 438 void visit(ir_discard *ir); 439 void visit(ir_call *ir); 440 void visit(ir_function *ir); 441 void visit(ir_function_signature *ir); 442 443 fs_inst *emit(fs_inst inst); 444 void assign_curb_setup(); 445 void assign_urb_setup(); 446 void assign_regs(); 447 void generate_code(); 448 void generate_fb_write(fs_inst *inst); 449 void generate_linterp(fs_inst *inst, struct brw_reg dst, 450 struct brw_reg *src); 451 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 452 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 453 void generate_discard(fs_inst *inst); 454 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 455 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 456 457 void emit_dummy_fs(); 458 void emit_fragcoord_interpolation(ir_variable *ir); 459 void emit_general_interpolation(ir_variable *ir); 460 void emit_interpolation_setup(); 461 void emit_fb_writes(); 462 463 struct brw_reg interp_reg(int location, int channel); 464 int setup_uniform_values(int loc, const glsl_type *type); 465 466 struct brw_context *brw; 467 const struct gl_fragment_program *fp; 468 struct intel_context *intel; 469 GLcontext *ctx; 470 struct brw_wm_compile *c; 471 struct brw_compile *p; 472 struct brw_shader *shader; 473 void *mem_ctx; 474 exec_list instructions; 475 int next_abstract_grf; 476 struct hash_table *variable_ht; 477 ir_variable *frag_color, *frag_data, *frag_depth; 478 int first_non_payload_grf; 479 480 /** @{ debug annotation info */ 481 const char *current_annotation; 482 ir_instruction *base_ir; 483 const char **annotation_string; 484 ir_instruction **annotation_ir; 485 /** @} */ 486 487 bool fail; 488 489 /* Result of last visit() method. */ 490 fs_reg result; 491 492 fs_reg pixel_x; 493 fs_reg pixel_y; 494 fs_reg wpos_w; 495 fs_reg pixel_w; 496 fs_reg delta_x; 497 fs_reg delta_y; 498 499 int grf_used; 500 501}; 502 503/** Fixed HW reg constructor. */ 504fs_reg::fs_reg(enum register_file file, int hw_reg) 505{ 506 init(); 507 this->file = file; 508 this->hw_reg = hw_reg; 509 this->type = BRW_REGISTER_TYPE_F; 510} 511 512int 513brw_type_for_base_type(const struct glsl_type *type) 514{ 515 switch (type->base_type) { 516 case GLSL_TYPE_FLOAT: 517 return BRW_REGISTER_TYPE_F; 518 case GLSL_TYPE_INT: 519 case GLSL_TYPE_BOOL: 520 return BRW_REGISTER_TYPE_D; 521 case GLSL_TYPE_UINT: 522 return BRW_REGISTER_TYPE_UD; 523 case GLSL_TYPE_ARRAY: 524 case GLSL_TYPE_STRUCT: 525 /* These should be overridden with the type of the member when 526 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 527 * way to trip up if we don't. 528 */ 529 return BRW_REGISTER_TYPE_UD; 530 default: 531 assert(!"not reached"); 532 return BRW_REGISTER_TYPE_F; 533 } 534} 535 536/** Automatic reg constructor. */ 537fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 538{ 539 init(); 540 541 this->file = GRF; 542 this->reg = v->next_abstract_grf; 543 this->reg_offset = 0; 544 v->next_abstract_grf += type_size(type); 545 this->type = brw_type_for_base_type(type); 546} 547 548fs_reg * 549fs_visitor::variable_storage(ir_variable *var) 550{ 551 return (fs_reg *)hash_table_find(this->variable_ht, var); 552} 553 554/* Our support for uniforms is piggy-backed on the struct 555 * gl_fragment_program, because that's where the values actually 556 * get stored, rather than in some global gl_shader_program uniform 557 * store. 558 */ 559int 560fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 561{ 562 unsigned int offset = 0; 563 float *vec_values; 564 565 if (type->is_matrix()) { 566 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 567 type->vector_elements, 568 1); 569 570 for (unsigned int i = 0; i < type->matrix_columns; i++) { 571 offset += setup_uniform_values(loc + offset, column); 572 } 573 574 return offset; 575 } 576 577 switch (type->base_type) { 578 case GLSL_TYPE_FLOAT: 579 case GLSL_TYPE_UINT: 580 case GLSL_TYPE_INT: 581 case GLSL_TYPE_BOOL: 582 vec_values = fp->Base.Parameters->ParameterValues[loc]; 583 for (unsigned int i = 0; i < type->vector_elements; i++) { 584 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 585 } 586 return 1; 587 588 case GLSL_TYPE_STRUCT: 589 for (unsigned int i = 0; i < type->length; i++) { 590 offset += setup_uniform_values(loc + offset, 591 type->fields.structure[i].type); 592 } 593 return offset; 594 595 case GLSL_TYPE_ARRAY: 596 for (unsigned int i = 0; i < type->length; i++) { 597 offset += setup_uniform_values(loc + offset, type->fields.array); 598 } 599 return offset; 600 601 case GLSL_TYPE_SAMPLER: 602 /* The sampler takes up a slot, but we don't use any values from it. */ 603 return 1; 604 605 default: 606 assert(!"not reached"); 607 return 0; 608 } 609} 610 611void 612fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 613{ 614 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 615 fs_reg wpos = *reg; 616 fs_reg neg_y = this->pixel_y; 617 neg_y.negate = true; 618 619 /* gl_FragCoord.x */ 620 if (ir->pixel_center_integer) { 621 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 622 } else { 623 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 624 } 625 wpos.reg_offset++; 626 627 /* gl_FragCoord.y */ 628 if (ir->origin_upper_left && ir->pixel_center_integer) { 629 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 630 } else { 631 fs_reg pixel_y = this->pixel_y; 632 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 633 634 if (!ir->origin_upper_left) { 635 pixel_y.negate = true; 636 offset += c->key.drawable_height - 1.0; 637 } 638 639 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 640 } 641 wpos.reg_offset++; 642 643 /* gl_FragCoord.z */ 644 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 645 interp_reg(FRAG_ATTRIB_WPOS, 2))); 646 wpos.reg_offset++; 647 648 /* gl_FragCoord.w: Already set up in emit_interpolation */ 649 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 650 651 hash_table_insert(this->variable_ht, reg, ir); 652} 653 654 655void 656fs_visitor::emit_general_interpolation(ir_variable *ir) 657{ 658 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 659 /* Interpolation is always in floating point regs. */ 660 reg->type = BRW_REGISTER_TYPE_F; 661 fs_reg attr = *reg; 662 663 unsigned int array_elements; 664 const glsl_type *type; 665 666 if (ir->type->is_array()) { 667 array_elements = ir->type->length; 668 if (array_elements == 0) { 669 this->fail = true; 670 } 671 type = ir->type->fields.array; 672 } else { 673 array_elements = 1; 674 type = ir->type; 675 } 676 677 int location = ir->location; 678 for (unsigned int i = 0; i < array_elements; i++) { 679 for (unsigned int j = 0; j < type->matrix_columns; j++) { 680 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) { 681 /* If there's no incoming setup data for this slot, don't 682 * emit interpolation for it (since it's not used, and 683 * we'd fall over later trying to find the setup data. 684 */ 685 attr.reg_offset += type->vector_elements; 686 continue; 687 } 688 689 for (unsigned int c = 0; c < type->vector_elements; c++) { 690 struct brw_reg interp = interp_reg(location, c); 691 emit(fs_inst(FS_OPCODE_LINTERP, 692 attr, 693 this->delta_x, 694 this->delta_y, 695 fs_reg(interp))); 696 attr.reg_offset++; 697 } 698 attr.reg_offset -= type->vector_elements; 699 700 for (unsigned int c = 0; c < type->vector_elements; c++) { 701 emit(fs_inst(BRW_OPCODE_MUL, 702 attr, 703 attr, 704 this->pixel_w)); 705 attr.reg_offset++; 706 } 707 location++; 708 } 709 } 710 711 hash_table_insert(this->variable_ht, reg, ir); 712} 713 714void 715fs_visitor::visit(ir_variable *ir) 716{ 717 fs_reg *reg = NULL; 718 719 if (variable_storage(ir)) 720 return; 721 722 if (strcmp(ir->name, "gl_FragColor") == 0) { 723 this->frag_color = ir; 724 } else if (strcmp(ir->name, "gl_FragData") == 0) { 725 this->frag_data = ir; 726 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 727 this->frag_depth = ir; 728 } 729 730 if (ir->mode == ir_var_in) { 731 if (!strcmp(ir->name, "gl_FragCoord")) { 732 emit_fragcoord_interpolation(ir); 733 return; 734 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 735 reg = new(this->mem_ctx) fs_reg(this, ir->type); 736 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 737 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 738 * us front face 739 */ 740 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 741 *reg, 742 fs_reg(r1_6ud), 743 fs_reg(1u << 31))); 744 inst->conditional_mod = BRW_CONDITIONAL_L; 745 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 746 } else { 747 emit_general_interpolation(ir); 748 return; 749 } 750 } 751 752 if (ir->mode == ir_var_uniform) { 753 int param_index = c->prog_data.nr_params; 754 755 setup_uniform_values(ir->location, ir->type); 756 757 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 758 } 759 760 if (!reg) 761 reg = new(this->mem_ctx) fs_reg(this, ir->type); 762 763 hash_table_insert(this->variable_ht, reg, ir); 764} 765 766void 767fs_visitor::visit(ir_dereference_variable *ir) 768{ 769 fs_reg *reg = variable_storage(ir->var); 770 this->result = *reg; 771} 772 773void 774fs_visitor::visit(ir_dereference_record *ir) 775{ 776 const glsl_type *struct_type = ir->record->type; 777 778 ir->record->accept(this); 779 780 unsigned int offset = 0; 781 for (unsigned int i = 0; i < struct_type->length; i++) { 782 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 783 break; 784 offset += type_size(struct_type->fields.structure[i].type); 785 } 786 this->result.reg_offset += offset; 787 this->result.type = brw_type_for_base_type(ir->type); 788} 789 790void 791fs_visitor::visit(ir_dereference_array *ir) 792{ 793 ir_constant *index; 794 int element_size; 795 796 ir->array->accept(this); 797 index = ir->array_index->as_constant(); 798 799 element_size = type_size(ir->type); 800 this->result.type = brw_type_for_base_type(ir->type); 801 802 if (index) { 803 assert(this->result.file == UNIFORM || 804 (this->result.file == GRF && 805 this->result.reg != 0)); 806 this->result.reg_offset += index->value.i[0] * element_size; 807 } else { 808 assert(!"FINISHME: non-constant array element"); 809 } 810} 811 812void 813fs_visitor::visit(ir_expression *ir) 814{ 815 unsigned int operand; 816 fs_reg op[2], temp; 817 fs_reg result; 818 fs_inst *inst; 819 820 for (operand = 0; operand < ir->get_num_operands(); operand++) { 821 ir->operands[operand]->accept(this); 822 if (this->result.file == BAD_FILE) { 823 ir_print_visitor v; 824 printf("Failed to get tree for expression operand:\n"); 825 ir->operands[operand]->accept(&v); 826 this->fail = true; 827 } 828 op[operand] = this->result; 829 830 /* Matrix expression operands should have been broken down to vector 831 * operations already. 832 */ 833 assert(!ir->operands[operand]->type->is_matrix()); 834 /* And then those vector operands should have been broken down to scalar. 835 */ 836 assert(!ir->operands[operand]->type->is_vector()); 837 } 838 839 /* Storage for our result. If our result goes into an assignment, it will 840 * just get copy-propagated out, so no worries. 841 */ 842 this->result = fs_reg(this, ir->type); 843 844 switch (ir->operation) { 845 case ir_unop_logic_not: 846 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 847 break; 848 case ir_unop_neg: 849 op[0].negate = !op[0].negate; 850 this->result = op[0]; 851 break; 852 case ir_unop_abs: 853 op[0].abs = true; 854 this->result = op[0]; 855 break; 856 case ir_unop_sign: 857 temp = fs_reg(this, ir->type); 858 859 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 860 861 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 862 inst->conditional_mod = BRW_CONDITIONAL_G; 863 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 864 inst->predicated = true; 865 866 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 867 inst->conditional_mod = BRW_CONDITIONAL_L; 868 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 869 inst->predicated = true; 870 871 break; 872 case ir_unop_rcp: 873 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 874 break; 875 876 case ir_unop_exp2: 877 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 878 break; 879 case ir_unop_log2: 880 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 881 break; 882 case ir_unop_exp: 883 case ir_unop_log: 884 assert(!"not reached: should be handled by ir_explog_to_explog2"); 885 break; 886 case ir_unop_sin: 887 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 888 break; 889 case ir_unop_cos: 890 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 891 break; 892 893 case ir_unop_dFdx: 894 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 895 break; 896 case ir_unop_dFdy: 897 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 898 break; 899 900 case ir_binop_add: 901 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 902 break; 903 case ir_binop_sub: 904 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 905 break; 906 907 case ir_binop_mul: 908 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 909 break; 910 case ir_binop_div: 911 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 912 break; 913 case ir_binop_mod: 914 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 915 break; 916 917 case ir_binop_less: 918 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 919 inst->conditional_mod = BRW_CONDITIONAL_L; 920 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 921 break; 922 case ir_binop_greater: 923 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 924 inst->conditional_mod = BRW_CONDITIONAL_G; 925 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 926 break; 927 case ir_binop_lequal: 928 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 929 inst->conditional_mod = BRW_CONDITIONAL_LE; 930 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 931 break; 932 case ir_binop_gequal: 933 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 934 inst->conditional_mod = BRW_CONDITIONAL_GE; 935 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 936 break; 937 case ir_binop_equal: 938 case ir_binop_all_equal: /* same as nequal for scalars */ 939 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 940 inst->conditional_mod = BRW_CONDITIONAL_Z; 941 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 942 break; 943 case ir_binop_nequal: 944 case ir_binop_any_nequal: /* same as nequal for scalars */ 945 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 946 inst->conditional_mod = BRW_CONDITIONAL_NZ; 947 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 948 break; 949 950 case ir_binop_logic_xor: 951 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 952 break; 953 954 case ir_binop_logic_or: 955 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 956 break; 957 958 case ir_binop_logic_and: 959 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 960 break; 961 962 case ir_binop_dot: 963 case ir_binop_cross: 964 case ir_unop_any: 965 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 966 break; 967 968 case ir_unop_noise: 969 assert(!"not reached: should be handled by lower_noise"); 970 break; 971 972 case ir_unop_sqrt: 973 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 974 break; 975 976 case ir_unop_rsq: 977 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 978 break; 979 980 case ir_unop_i2f: 981 case ir_unop_b2f: 982 case ir_unop_b2i: 983 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 984 break; 985 case ir_unop_f2i: 986 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 987 break; 988 case ir_unop_f2b: 989 case ir_unop_i2b: 990 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 991 inst->conditional_mod = BRW_CONDITIONAL_NZ; 992 993 case ir_unop_trunc: 994 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 995 break; 996 case ir_unop_ceil: 997 op[0].negate = ~op[0].negate; 998 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 999 this->result.negate = true; 1000 break; 1001 case ir_unop_floor: 1002 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1003 break; 1004 case ir_unop_fract: 1005 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1006 break; 1007 1008 case ir_binop_min: 1009 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1010 inst->conditional_mod = BRW_CONDITIONAL_L; 1011 1012 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1013 inst->predicated = true; 1014 break; 1015 case ir_binop_max: 1016 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1017 inst->conditional_mod = BRW_CONDITIONAL_G; 1018 1019 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1020 inst->predicated = true; 1021 break; 1022 1023 case ir_binop_pow: 1024 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1025 break; 1026 1027 case ir_unop_bit_not: 1028 case ir_unop_u2f: 1029 case ir_binop_lshift: 1030 case ir_binop_rshift: 1031 case ir_binop_bit_and: 1032 case ir_binop_bit_xor: 1033 case ir_binop_bit_or: 1034 assert(!"GLSL 1.30 features unsupported"); 1035 break; 1036 } 1037} 1038 1039void 1040fs_visitor::visit(ir_assignment *ir) 1041{ 1042 struct fs_reg l, r; 1043 int i; 1044 int write_mask; 1045 fs_inst *inst; 1046 1047 /* FINISHME: arrays on the lhs */ 1048 ir->lhs->accept(this); 1049 l = this->result; 1050 1051 ir->rhs->accept(this); 1052 r = this->result; 1053 1054 /* FINISHME: This should really set to the correct maximal writemask for each 1055 * FINISHME: component written (in the loops below). This case can only 1056 * FINISHME: occur for matrices, arrays, and structures. 1057 */ 1058 if (ir->write_mask == 0) { 1059 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1060 write_mask = WRITEMASK_XYZW; 1061 } else { 1062 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 1063 write_mask = ir->write_mask; 1064 } 1065 1066 assert(l.file != BAD_FILE); 1067 assert(r.file != BAD_FILE); 1068 1069 if (ir->condition) { 1070 /* Get the condition bool into the predicate. */ 1071 ir->condition->accept(this); 1072 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1073 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1074 } 1075 1076 for (i = 0; i < type_size(ir->lhs->type); i++) { 1077 if (i >= 4 || (write_mask & (1 << i))) { 1078 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1079 if (ir->condition) 1080 inst->predicated = true; 1081 r.reg_offset++; 1082 } 1083 l.reg_offset++; 1084 } 1085} 1086 1087void 1088fs_visitor::visit(ir_texture *ir) 1089{ 1090 int base_mrf = 2; 1091 fs_inst *inst = NULL; 1092 unsigned int mlen = 0; 1093 1094 ir->coordinate->accept(this); 1095 fs_reg coordinate = this->result; 1096 1097 if (ir->projector) { 1098 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 1099 1100 ir->projector->accept(this); 1101 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 1102 1103 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 1104 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1105 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 1106 coordinate.reg_offset++; 1107 proj_coordinate.reg_offset++; 1108 } 1109 proj_coordinate.reg_offset = 0; 1110 1111 coordinate = proj_coordinate; 1112 } 1113 1114 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1115 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1116 coordinate.reg_offset++; 1117 } 1118 1119 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 1120 if (intel->gen < 5) 1121 mlen = 3; 1122 1123 if (ir->shadow_comparitor) { 1124 /* For shadow comparisons, we have to supply u,v,r. */ 1125 mlen = 3; 1126 1127 ir->shadow_comparitor->accept(this); 1128 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1129 mlen++; 1130 } 1131 1132 /* Do we ever want to handle writemasking on texture samples? Is it 1133 * performance relevant? 1134 */ 1135 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1136 1137 switch (ir->op) { 1138 case ir_tex: 1139 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1140 break; 1141 case ir_txb: 1142 ir->lod_info.bias->accept(this); 1143 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1144 mlen++; 1145 1146 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1147 break; 1148 case ir_txl: 1149 ir->lod_info.lod->accept(this); 1150 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1151 mlen++; 1152 1153 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1154 break; 1155 case ir_txd: 1156 case ir_txf: 1157 assert(!"GLSL 1.30 features unsupported"); 1158 break; 1159 } 1160 1161 inst->sampler = 1162 _mesa_get_sampler_uniform_value(ir->sampler, 1163 ctx->Shader.CurrentProgram, 1164 &brw->fragment_program->Base); 1165 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1166 1167 this->result = dst; 1168 1169 if (ir->shadow_comparitor) 1170 inst->shadow_compare = true; 1171 inst->mlen = mlen; 1172} 1173 1174void 1175fs_visitor::visit(ir_swizzle *ir) 1176{ 1177 ir->val->accept(this); 1178 fs_reg val = this->result; 1179 1180 fs_reg result = fs_reg(this, ir->type); 1181 this->result = result; 1182 1183 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1184 fs_reg channel = val; 1185 int swiz = 0; 1186 1187 switch (i) { 1188 case 0: 1189 swiz = ir->mask.x; 1190 break; 1191 case 1: 1192 swiz = ir->mask.y; 1193 break; 1194 case 2: 1195 swiz = ir->mask.z; 1196 break; 1197 case 3: 1198 swiz = ir->mask.w; 1199 break; 1200 } 1201 1202 channel.reg_offset += swiz; 1203 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1204 result.reg_offset++; 1205 } 1206} 1207 1208void 1209fs_visitor::visit(ir_discard *ir) 1210{ 1211 assert(ir->condition == NULL); /* FINISHME */ 1212 1213 emit(fs_inst(FS_OPCODE_DISCARD)); 1214} 1215 1216void 1217fs_visitor::visit(ir_constant *ir) 1218{ 1219 fs_reg reg(this, ir->type); 1220 this->result = reg; 1221 1222 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1223 switch (ir->type->base_type) { 1224 case GLSL_TYPE_FLOAT: 1225 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1226 break; 1227 case GLSL_TYPE_UINT: 1228 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1229 break; 1230 case GLSL_TYPE_INT: 1231 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1232 break; 1233 case GLSL_TYPE_BOOL: 1234 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1235 break; 1236 default: 1237 assert(!"Non-float/uint/int/bool constant"); 1238 } 1239 reg.reg_offset++; 1240 } 1241} 1242 1243void 1244fs_visitor::visit(ir_if *ir) 1245{ 1246 fs_inst *inst; 1247 1248 /* Don't point the annotation at the if statement, because then it plus 1249 * the then and else blocks get printed. 1250 */ 1251 this->base_ir = ir->condition; 1252 1253 /* Generate the condition into the condition code. */ 1254 ir->condition->accept(this); 1255 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1256 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1257 1258 inst = emit(fs_inst(BRW_OPCODE_IF)); 1259 inst->predicated = true; 1260 1261 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1262 ir_instruction *ir = (ir_instruction *)iter.get(); 1263 this->base_ir = ir; 1264 1265 ir->accept(this); 1266 } 1267 1268 if (!ir->else_instructions.is_empty()) { 1269 emit(fs_inst(BRW_OPCODE_ELSE)); 1270 1271 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1272 ir_instruction *ir = (ir_instruction *)iter.get(); 1273 this->base_ir = ir; 1274 1275 ir->accept(this); 1276 } 1277 } 1278 1279 emit(fs_inst(BRW_OPCODE_ENDIF)); 1280} 1281 1282void 1283fs_visitor::visit(ir_loop *ir) 1284{ 1285 fs_reg counter = reg_undef; 1286 1287 if (ir->counter) { 1288 this->base_ir = ir->counter; 1289 ir->counter->accept(this); 1290 counter = *(variable_storage(ir->counter)); 1291 1292 if (ir->from) { 1293 this->base_ir = ir->from; 1294 ir->from->accept(this); 1295 1296 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1297 } 1298 } 1299 1300 /* Start a safety counter. If the user messed up their loop 1301 * counting, we don't want to hang the GPU. 1302 */ 1303 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1304 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1305 1306 emit(fs_inst(BRW_OPCODE_DO)); 1307 1308 if (ir->to) { 1309 this->base_ir = ir->to; 1310 ir->to->accept(this); 1311 1312 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1313 counter, this->result)); 1314 switch (ir->cmp) { 1315 case ir_binop_equal: 1316 inst->conditional_mod = BRW_CONDITIONAL_Z; 1317 break; 1318 case ir_binop_nequal: 1319 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1320 break; 1321 case ir_binop_gequal: 1322 inst->conditional_mod = BRW_CONDITIONAL_GE; 1323 break; 1324 case ir_binop_lequal: 1325 inst->conditional_mod = BRW_CONDITIONAL_LE; 1326 break; 1327 case ir_binop_greater: 1328 inst->conditional_mod = BRW_CONDITIONAL_G; 1329 break; 1330 case ir_binop_less: 1331 inst->conditional_mod = BRW_CONDITIONAL_L; 1332 break; 1333 default: 1334 assert(!"not reached: unknown loop condition"); 1335 this->fail = true; 1336 break; 1337 } 1338 1339 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1340 inst->predicated = true; 1341 } 1342 1343 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1344 ir_instruction *ir = (ir_instruction *)iter.get(); 1345 fs_inst *inst; 1346 1347 this->base_ir = ir; 1348 ir->accept(this); 1349 1350 /* Check the maximum loop iters counter. */ 1351 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1352 inst->conditional_mod = BRW_CONDITIONAL_Z; 1353 1354 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1355 inst->predicated = true; 1356 } 1357 1358 if (ir->increment) { 1359 this->base_ir = ir->increment; 1360 ir->increment->accept(this); 1361 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1362 } 1363 1364 emit(fs_inst(BRW_OPCODE_WHILE)); 1365} 1366 1367void 1368fs_visitor::visit(ir_loop_jump *ir) 1369{ 1370 switch (ir->mode) { 1371 case ir_loop_jump::jump_break: 1372 emit(fs_inst(BRW_OPCODE_BREAK)); 1373 break; 1374 case ir_loop_jump::jump_continue: 1375 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1376 break; 1377 } 1378} 1379 1380void 1381fs_visitor::visit(ir_call *ir) 1382{ 1383 assert(!"FINISHME"); 1384} 1385 1386void 1387fs_visitor::visit(ir_return *ir) 1388{ 1389 assert(!"FINISHME"); 1390} 1391 1392void 1393fs_visitor::visit(ir_function *ir) 1394{ 1395 /* Ignore function bodies other than main() -- we shouldn't see calls to 1396 * them since they should all be inlined before we get to ir_to_mesa. 1397 */ 1398 if (strcmp(ir->name, "main") == 0) { 1399 const ir_function_signature *sig; 1400 exec_list empty; 1401 1402 sig = ir->matching_signature(&empty); 1403 1404 assert(sig); 1405 1406 foreach_iter(exec_list_iterator, iter, sig->body) { 1407 ir_instruction *ir = (ir_instruction *)iter.get(); 1408 this->base_ir = ir; 1409 1410 ir->accept(this); 1411 } 1412 } 1413} 1414 1415void 1416fs_visitor::visit(ir_function_signature *ir) 1417{ 1418 assert(!"not reached"); 1419 (void)ir; 1420} 1421 1422fs_inst * 1423fs_visitor::emit(fs_inst inst) 1424{ 1425 fs_inst *list_inst = new(mem_ctx) fs_inst; 1426 *list_inst = inst; 1427 1428 list_inst->annotation = this->current_annotation; 1429 list_inst->ir = this->base_ir; 1430 1431 this->instructions.push_tail(list_inst); 1432 1433 return list_inst; 1434} 1435 1436/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1437void 1438fs_visitor::emit_dummy_fs() 1439{ 1440 /* Everyone's favorite color. */ 1441 emit(fs_inst(BRW_OPCODE_MOV, 1442 fs_reg(MRF, 2), 1443 fs_reg(1.0f))); 1444 emit(fs_inst(BRW_OPCODE_MOV, 1445 fs_reg(MRF, 3), 1446 fs_reg(0.0f))); 1447 emit(fs_inst(BRW_OPCODE_MOV, 1448 fs_reg(MRF, 4), 1449 fs_reg(1.0f))); 1450 emit(fs_inst(BRW_OPCODE_MOV, 1451 fs_reg(MRF, 5), 1452 fs_reg(0.0f))); 1453 1454 fs_inst *write; 1455 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1456 fs_reg(0), 1457 fs_reg(0))); 1458} 1459 1460/* The register location here is relative to the start of the URB 1461 * data. It will get adjusted to be a real location before 1462 * generate_code() time. 1463 */ 1464struct brw_reg 1465fs_visitor::interp_reg(int location, int channel) 1466{ 1467 int regnr = location * 2 + channel / 2; 1468 int stride = (channel & 1) * 4; 1469 1470 return brw_vec1_grf(regnr, stride); 1471} 1472 1473/** Emits the interpolation for the varying inputs. */ 1474void 1475fs_visitor::emit_interpolation_setup() 1476{ 1477 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1478 1479 this->current_annotation = "compute pixel centers"; 1480 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1481 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1482 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1483 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1484 emit(fs_inst(BRW_OPCODE_ADD, 1485 this->pixel_x, 1486 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1487 fs_reg(brw_imm_v(0x10101010)))); 1488 emit(fs_inst(BRW_OPCODE_ADD, 1489 this->pixel_y, 1490 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1491 fs_reg(brw_imm_v(0x11001100)))); 1492 1493 this->current_annotation = "compute pixel deltas from v0"; 1494 this->delta_x = fs_reg(this, glsl_type::float_type); 1495 this->delta_y = fs_reg(this, glsl_type::float_type); 1496 emit(fs_inst(BRW_OPCODE_ADD, 1497 this->delta_x, 1498 this->pixel_x, 1499 fs_reg(negate(brw_vec1_grf(1, 0))))); 1500 emit(fs_inst(BRW_OPCODE_ADD, 1501 this->delta_y, 1502 this->pixel_y, 1503 fs_reg(negate(brw_vec1_grf(1, 1))))); 1504 1505 this->current_annotation = "compute pos.w and 1/pos.w"; 1506 /* Compute wpos.w. It's always in our setup, since it's needed to 1507 * interpolate the other attributes. 1508 */ 1509 this->wpos_w = fs_reg(this, glsl_type::float_type); 1510 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1511 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1512 /* Compute the pixel 1/W value from wpos.w. */ 1513 this->pixel_w = fs_reg(this, glsl_type::float_type); 1514 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1515 this->current_annotation = NULL; 1516} 1517 1518void 1519fs_visitor::emit_fb_writes() 1520{ 1521 this->current_annotation = "FB write header"; 1522 int nr = 0; 1523 1524 /* m0, m1 header */ 1525 nr += 2; 1526 1527 if (c->key.aa_dest_stencil_reg) { 1528 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1529 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1530 } 1531 1532 /* Reserve space for color. It'll be filled in per MRT below. */ 1533 int color_mrf = nr; 1534 nr += 4; 1535 1536 if (c->key.source_depth_to_render_target) { 1537 if (c->key.computes_depth) { 1538 /* Hand over gl_FragDepth. */ 1539 assert(this->frag_depth); 1540 fs_reg depth = *(variable_storage(this->frag_depth)); 1541 1542 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1543 } else { 1544 /* Pass through the payload depth. */ 1545 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1546 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1547 } 1548 } 1549 1550 if (c->key.dest_depth_reg) { 1551 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1552 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1553 } 1554 1555 fs_reg color = reg_undef; 1556 if (this->frag_color) 1557 color = *(variable_storage(this->frag_color)); 1558 else if (this->frag_data) 1559 color = *(variable_storage(this->frag_data)); 1560 1561 for (int target = 0; target < c->key.nr_color_regions; target++) { 1562 this->current_annotation = talloc_asprintf(this->mem_ctx, 1563 "FB write target %d", 1564 target); 1565 if (this->frag_color || this->frag_data) { 1566 for (int i = 0; i < 4; i++) { 1567 emit(fs_inst(BRW_OPCODE_MOV, 1568 fs_reg(MRF, color_mrf + i), 1569 color)); 1570 color.reg_offset++; 1571 } 1572 } 1573 1574 if (this->frag_color) 1575 color.reg_offset -= 4; 1576 1577 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1578 reg_undef, reg_undef)); 1579 inst->target = target; 1580 inst->mlen = nr; 1581 if (target == c->key.nr_color_regions - 1) 1582 inst->eot = true; 1583 } 1584 1585 if (c->key.nr_color_regions == 0) { 1586 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1587 reg_undef, reg_undef)); 1588 inst->mlen = nr; 1589 inst->eot = true; 1590 } 1591 1592 this->current_annotation = NULL; 1593} 1594 1595void 1596fs_visitor::generate_fb_write(fs_inst *inst) 1597{ 1598 GLboolean eot = inst->eot; 1599 1600 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1601 * move, here's g1. 1602 */ 1603 brw_push_insn_state(p); 1604 brw_set_mask_control(p, BRW_MASK_DISABLE); 1605 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1606 brw_MOV(p, 1607 brw_message_reg(1), 1608 brw_vec8_grf(1, 0)); 1609 brw_pop_insn_state(p); 1610 1611 brw_fb_WRITE(p, 1612 8, /* dispatch_width */ 1613 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1614 0, /* base MRF */ 1615 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1616 inst->target, 1617 inst->mlen, 1618 0, 1619 eot); 1620} 1621 1622void 1623fs_visitor::generate_linterp(fs_inst *inst, 1624 struct brw_reg dst, struct brw_reg *src) 1625{ 1626 struct brw_reg delta_x = src[0]; 1627 struct brw_reg delta_y = src[1]; 1628 struct brw_reg interp = src[2]; 1629 1630 if (brw->has_pln && 1631 delta_y.nr == delta_x.nr + 1 && 1632 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1633 brw_PLN(p, dst, interp, delta_x); 1634 } else { 1635 brw_LINE(p, brw_null_reg(), interp, delta_x); 1636 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1637 } 1638} 1639 1640void 1641fs_visitor::generate_math(fs_inst *inst, 1642 struct brw_reg dst, struct brw_reg *src) 1643{ 1644 int op; 1645 1646 switch (inst->opcode) { 1647 case FS_OPCODE_RCP: 1648 op = BRW_MATH_FUNCTION_INV; 1649 break; 1650 case FS_OPCODE_RSQ: 1651 op = BRW_MATH_FUNCTION_RSQ; 1652 break; 1653 case FS_OPCODE_SQRT: 1654 op = BRW_MATH_FUNCTION_SQRT; 1655 break; 1656 case FS_OPCODE_EXP2: 1657 op = BRW_MATH_FUNCTION_EXP; 1658 break; 1659 case FS_OPCODE_LOG2: 1660 op = BRW_MATH_FUNCTION_LOG; 1661 break; 1662 case FS_OPCODE_POW: 1663 op = BRW_MATH_FUNCTION_POW; 1664 break; 1665 case FS_OPCODE_SIN: 1666 op = BRW_MATH_FUNCTION_SIN; 1667 break; 1668 case FS_OPCODE_COS: 1669 op = BRW_MATH_FUNCTION_COS; 1670 break; 1671 default: 1672 assert(!"not reached: unknown math function"); 1673 op = 0; 1674 break; 1675 } 1676 1677 if (inst->opcode == FS_OPCODE_POW) { 1678 brw_MOV(p, brw_message_reg(3), src[1]); 1679 } 1680 1681 brw_math(p, dst, 1682 op, 1683 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1684 BRW_MATH_SATURATE_NONE, 1685 2, src[0], 1686 BRW_MATH_DATA_VECTOR, 1687 BRW_MATH_PRECISION_FULL); 1688} 1689 1690void 1691fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1692{ 1693 int msg_type = -1; 1694 int rlen = 4; 1695 1696 if (intel->gen == 5) { 1697 switch (inst->opcode) { 1698 case FS_OPCODE_TEX: 1699 if (inst->shadow_compare) { 1700 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1701 } else { 1702 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1703 } 1704 break; 1705 case FS_OPCODE_TXB: 1706 if (inst->shadow_compare) { 1707 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1708 } else { 1709 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1710 } 1711 break; 1712 } 1713 } else { 1714 switch (inst->opcode) { 1715 case FS_OPCODE_TEX: 1716 /* Note that G45 and older determines shadow compare and dispatch width 1717 * from message length for most messages. 1718 */ 1719 if (inst->shadow_compare) { 1720 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1721 } else { 1722 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1723 } 1724 case FS_OPCODE_TXB: 1725 if (inst->shadow_compare) { 1726 assert(!"FINISHME: shadow compare with bias."); 1727 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1728 } else { 1729 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1730 rlen = 8; 1731 } 1732 break; 1733 } 1734 } 1735 assert(msg_type != -1); 1736 1737 /* g0 header. */ 1738 src.nr--; 1739 1740 brw_SAMPLE(p, 1741 retype(dst, BRW_REGISTER_TYPE_UW), 1742 src.nr, 1743 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1744 SURF_INDEX_TEXTURE(inst->sampler), 1745 inst->sampler, 1746 WRITEMASK_XYZW, 1747 msg_type, 1748 rlen, 1749 inst->mlen + 1, 1750 0, 1751 1, 1752 BRW_SAMPLER_SIMD_MODE_SIMD8); 1753} 1754 1755 1756/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1757 * looking like: 1758 * 1759 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1760 * 1761 * and we're trying to produce: 1762 * 1763 * DDX DDY 1764 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1765 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1766 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1767 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1768 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1769 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1770 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1771 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1772 * 1773 * and add another set of two more subspans if in 16-pixel dispatch mode. 1774 * 1775 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1776 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1777 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1778 * between each other. We could probably do it like ddx and swizzle the right 1779 * order later, but bail for now and just produce 1780 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1781 */ 1782void 1783fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1784{ 1785 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1786 BRW_REGISTER_TYPE_F, 1787 BRW_VERTICAL_STRIDE_2, 1788 BRW_WIDTH_2, 1789 BRW_HORIZONTAL_STRIDE_0, 1790 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1791 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1792 BRW_REGISTER_TYPE_F, 1793 BRW_VERTICAL_STRIDE_2, 1794 BRW_WIDTH_2, 1795 BRW_HORIZONTAL_STRIDE_0, 1796 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1797 brw_ADD(p, dst, src0, negate(src1)); 1798} 1799 1800void 1801fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1802{ 1803 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1804 BRW_REGISTER_TYPE_F, 1805 BRW_VERTICAL_STRIDE_4, 1806 BRW_WIDTH_4, 1807 BRW_HORIZONTAL_STRIDE_0, 1808 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1809 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1810 BRW_REGISTER_TYPE_F, 1811 BRW_VERTICAL_STRIDE_4, 1812 BRW_WIDTH_4, 1813 BRW_HORIZONTAL_STRIDE_0, 1814 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1815 brw_ADD(p, dst, src0, negate(src1)); 1816} 1817 1818void 1819fs_visitor::generate_discard(fs_inst *inst) 1820{ 1821 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1822 brw_push_insn_state(p); 1823 brw_set_mask_control(p, BRW_MASK_DISABLE); 1824 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ 1825 brw_AND(p, g0, c->emit_mask_reg, g0); 1826 brw_pop_insn_state(p); 1827} 1828 1829static void 1830trivial_assign_reg(int header_size, fs_reg *reg) 1831{ 1832 if (reg->file == GRF && reg->reg != 0) { 1833 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; 1834 reg->reg = 0; 1835 } 1836} 1837 1838void 1839fs_visitor::assign_curb_setup() 1840{ 1841 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1842 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1843 1844 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1845 c->prog_data.curb_read_length) & 1) { 1846 /* Align the start of the interpolation coefficients so that we can use 1847 * the PLN instruction. 1848 */ 1849 c->prog_data.first_curbe_grf++; 1850 } 1851 1852 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1853 foreach_iter(exec_list_iterator, iter, this->instructions) { 1854 fs_inst *inst = (fs_inst *)iter.get(); 1855 1856 for (unsigned int i = 0; i < 3; i++) { 1857 if (inst->src[i].file == UNIFORM) { 1858 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1859 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1860 constant_nr / 8, 1861 constant_nr % 8); 1862 1863 inst->src[i].file = FIXED_HW_REG; 1864 inst->src[i].fixed_hw_reg = brw_reg; 1865 } 1866 } 1867 } 1868} 1869 1870void 1871fs_visitor::assign_urb_setup() 1872{ 1873 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1874 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1875 1876 c->prog_data.urb_read_length = 0; 1877 1878 /* Figure out where each of the incoming setup attributes lands. */ 1879 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1880 interp_reg_nr[i] = -1; 1881 1882 if (i != FRAG_ATTRIB_WPOS && 1883 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1884 continue; 1885 1886 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1887 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1888 c->prog_data.urb_read_length += 2; 1889 } 1890 1891 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1892 * the correct setup input. 1893 */ 1894 foreach_iter(exec_list_iterator, iter, this->instructions) { 1895 fs_inst *inst = (fs_inst *)iter.get(); 1896 1897 if (inst->opcode != FS_OPCODE_LINTERP) 1898 continue; 1899 1900 assert(inst->src[2].file == FIXED_HW_REG); 1901 1902 int location = inst->src[2].fixed_hw_reg.nr / 2; 1903 assert(interp_reg_nr[location] != -1); 1904 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1905 (inst->src[2].fixed_hw_reg.nr & 1)); 1906 } 1907 1908 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1909} 1910 1911void 1912fs_visitor::assign_regs() 1913{ 1914 int header_size = this->first_non_payload_grf; 1915 int last_grf = 0; 1916 1917 /* FINISHME: trivial assignment of register numbers */ 1918 foreach_iter(exec_list_iterator, iter, this->instructions) { 1919 fs_inst *inst = (fs_inst *)iter.get(); 1920 1921 trivial_assign_reg(header_size, &inst->dst); 1922 trivial_assign_reg(header_size, &inst->src[0]); 1923 trivial_assign_reg(header_size, &inst->src[1]); 1924 1925 last_grf = MAX2(last_grf, inst->dst.hw_reg); 1926 last_grf = MAX2(last_grf, inst->src[0].hw_reg); 1927 last_grf = MAX2(last_grf, inst->src[1].hw_reg); 1928 } 1929 1930 this->grf_used = last_grf + 1; 1931} 1932 1933static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 1934{ 1935 struct brw_reg brw_reg; 1936 1937 switch (reg->file) { 1938 case GRF: 1939 case ARF: 1940 case MRF: 1941 brw_reg = brw_vec8_reg(reg->file, 1942 reg->hw_reg, 0); 1943 brw_reg = retype(brw_reg, reg->type); 1944 break; 1945 case IMM: 1946 switch (reg->type) { 1947 case BRW_REGISTER_TYPE_F: 1948 brw_reg = brw_imm_f(reg->imm.f); 1949 break; 1950 case BRW_REGISTER_TYPE_D: 1951 brw_reg = brw_imm_d(reg->imm.i); 1952 break; 1953 case BRW_REGISTER_TYPE_UD: 1954 brw_reg = brw_imm_ud(reg->imm.u); 1955 break; 1956 default: 1957 assert(!"not reached"); 1958 break; 1959 } 1960 break; 1961 case FIXED_HW_REG: 1962 brw_reg = reg->fixed_hw_reg; 1963 break; 1964 case BAD_FILE: 1965 /* Probably unused. */ 1966 brw_reg = brw_null_reg(); 1967 break; 1968 case UNIFORM: 1969 assert(!"not reached"); 1970 brw_reg = brw_null_reg(); 1971 break; 1972 } 1973 if (reg->abs) 1974 brw_reg = brw_abs(brw_reg); 1975 if (reg->negate) 1976 brw_reg = negate(brw_reg); 1977 1978 return brw_reg; 1979} 1980 1981void 1982fs_visitor::generate_code() 1983{ 1984 unsigned int annotation_len = 0; 1985 int last_native_inst = 0; 1986 struct brw_instruction *if_stack[16], *loop_stack[16]; 1987 int if_stack_depth = 0, loop_stack_depth = 0; 1988 int if_depth_in_loop[16]; 1989 1990 if_depth_in_loop[loop_stack_depth] = 0; 1991 1992 memset(&if_stack, 0, sizeof(if_stack)); 1993 foreach_iter(exec_list_iterator, iter, this->instructions) { 1994 fs_inst *inst = (fs_inst *)iter.get(); 1995 struct brw_reg src[3], dst; 1996 1997 for (unsigned int i = 0; i < 3; i++) { 1998 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 1999 } 2000 dst = brw_reg_from_fs_reg(&inst->dst); 2001 2002 brw_set_conditionalmod(p, inst->conditional_mod); 2003 brw_set_predicate_control(p, inst->predicated); 2004 2005 switch (inst->opcode) { 2006 case BRW_OPCODE_MOV: 2007 brw_MOV(p, dst, src[0]); 2008 break; 2009 case BRW_OPCODE_ADD: 2010 brw_ADD(p, dst, src[0], src[1]); 2011 break; 2012 case BRW_OPCODE_MUL: 2013 brw_MUL(p, dst, src[0], src[1]); 2014 break; 2015 2016 case BRW_OPCODE_FRC: 2017 brw_FRC(p, dst, src[0]); 2018 break; 2019 case BRW_OPCODE_RNDD: 2020 brw_RNDD(p, dst, src[0]); 2021 break; 2022 case BRW_OPCODE_RNDZ: 2023 brw_RNDZ(p, dst, src[0]); 2024 break; 2025 2026 case BRW_OPCODE_AND: 2027 brw_AND(p, dst, src[0], src[1]); 2028 break; 2029 case BRW_OPCODE_OR: 2030 brw_OR(p, dst, src[0], src[1]); 2031 break; 2032 case BRW_OPCODE_XOR: 2033 brw_XOR(p, dst, src[0], src[1]); 2034 break; 2035 2036 case BRW_OPCODE_CMP: 2037 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2038 break; 2039 case BRW_OPCODE_SEL: 2040 brw_SEL(p, dst, src[0], src[1]); 2041 break; 2042 2043 case BRW_OPCODE_IF: 2044 assert(if_stack_depth < 16); 2045 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2046 if_depth_in_loop[loop_stack_depth]++; 2047 if_stack_depth++; 2048 break; 2049 case BRW_OPCODE_ELSE: 2050 if_stack[if_stack_depth - 1] = 2051 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2052 break; 2053 case BRW_OPCODE_ENDIF: 2054 if_stack_depth--; 2055 brw_ENDIF(p , if_stack[if_stack_depth]); 2056 if_depth_in_loop[loop_stack_depth]--; 2057 break; 2058 2059 case BRW_OPCODE_DO: 2060 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2061 if_depth_in_loop[loop_stack_depth] = 0; 2062 break; 2063 2064 case BRW_OPCODE_BREAK: 2065 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2066 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2067 break; 2068 case BRW_OPCODE_CONTINUE: 2069 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2070 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2071 break; 2072 2073 case BRW_OPCODE_WHILE: { 2074 struct brw_instruction *inst0, *inst1; 2075 GLuint br = 1; 2076 2077 if (intel->gen == 5) 2078 br = 2; 2079 2080 assert(loop_stack_depth > 0); 2081 loop_stack_depth--; 2082 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2083 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2084 while (inst0 > loop_stack[loop_stack_depth]) { 2085 inst0--; 2086 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2087 inst0->bits3.if_else.jump_count == 0) { 2088 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2089 } 2090 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2091 inst0->bits3.if_else.jump_count == 0) { 2092 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2093 } 2094 } 2095 } 2096 break; 2097 2098 case FS_OPCODE_RCP: 2099 case FS_OPCODE_RSQ: 2100 case FS_OPCODE_SQRT: 2101 case FS_OPCODE_EXP2: 2102 case FS_OPCODE_LOG2: 2103 case FS_OPCODE_POW: 2104 case FS_OPCODE_SIN: 2105 case FS_OPCODE_COS: 2106 generate_math(inst, dst, src); 2107 break; 2108 case FS_OPCODE_LINTERP: 2109 generate_linterp(inst, dst, src); 2110 break; 2111 case FS_OPCODE_TEX: 2112 case FS_OPCODE_TXB: 2113 case FS_OPCODE_TXL: 2114 generate_tex(inst, dst, src[0]); 2115 break; 2116 case FS_OPCODE_DISCARD: 2117 generate_discard(inst); 2118 break; 2119 case FS_OPCODE_DDX: 2120 generate_ddx(inst, dst, src[0]); 2121 break; 2122 case FS_OPCODE_DDY: 2123 generate_ddy(inst, dst, src[0]); 2124 break; 2125 case FS_OPCODE_FB_WRITE: 2126 generate_fb_write(inst); 2127 break; 2128 default: 2129 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2130 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2131 brw_opcodes[inst->opcode].name); 2132 } else { 2133 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2134 } 2135 this->fail = true; 2136 } 2137 2138 if (annotation_len < p->nr_insn) { 2139 annotation_len *= 2; 2140 if (annotation_len < 16) 2141 annotation_len = 16; 2142 2143 this->annotation_string = talloc_realloc(this->mem_ctx, 2144 annotation_string, 2145 const char *, 2146 annotation_len); 2147 this->annotation_ir = talloc_realloc(this->mem_ctx, 2148 annotation_ir, 2149 ir_instruction *, 2150 annotation_len); 2151 } 2152 2153 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2154 this->annotation_string[i] = inst->annotation; 2155 this->annotation_ir[i] = inst->ir; 2156 } 2157 last_native_inst = p->nr_insn; 2158 } 2159} 2160 2161GLboolean 2162brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2163{ 2164 struct brw_compile *p = &c->func; 2165 struct intel_context *intel = &brw->intel; 2166 GLcontext *ctx = &intel->ctx; 2167 struct brw_shader *shader = NULL; 2168 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2169 2170 if (!prog) 2171 return GL_FALSE; 2172 2173 if (!using_new_fs) 2174 return GL_FALSE; 2175 2176 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2177 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2178 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2179 break; 2180 } 2181 } 2182 if (!shader) 2183 return GL_FALSE; 2184 2185 /* We always use 8-wide mode, at least for now. For one, flow 2186 * control only works in 8-wide. Also, when we're fragment shader 2187 * bound, we're almost always under register pressure as well, so 2188 * 8-wide would save us from the performance cliff of spilling 2189 * regs. 2190 */ 2191 c->dispatch_width = 8; 2192 2193 if (INTEL_DEBUG & DEBUG_WM) { 2194 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2195 _mesa_print_ir(shader->ir, NULL); 2196 printf("\n"); 2197 } 2198 2199 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2200 */ 2201 fs_visitor v(c, shader); 2202 2203 if (0) { 2204 v.emit_dummy_fs(); 2205 } else { 2206 v.emit_interpolation_setup(); 2207 2208 /* Generate FS IR for main(). (the visitor only descends into 2209 * functions called "main"). 2210 */ 2211 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2212 ir_instruction *ir = (ir_instruction *)iter.get(); 2213 v.base_ir = ir; 2214 ir->accept(&v); 2215 } 2216 2217 v.emit_fb_writes(); 2218 v.assign_curb_setup(); 2219 v.assign_urb_setup(); 2220 v.assign_regs(); 2221 } 2222 2223 v.generate_code(); 2224 2225 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2226 2227 if (v.fail) 2228 return GL_FALSE; 2229 2230 if (INTEL_DEBUG & DEBUG_WM) { 2231 const char *last_annotation_string = NULL; 2232 ir_instruction *last_annotation_ir = NULL; 2233 2234 printf("Native code for fragment shader %d:\n", prog->Name); 2235 for (unsigned int i = 0; i < p->nr_insn; i++) { 2236 if (last_annotation_ir != v.annotation_ir[i]) { 2237 last_annotation_ir = v.annotation_ir[i]; 2238 if (last_annotation_ir) { 2239 printf(" "); 2240 last_annotation_ir->print(); 2241 printf("\n"); 2242 } 2243 } 2244 if (last_annotation_string != v.annotation_string[i]) { 2245 last_annotation_string = v.annotation_string[i]; 2246 if (last_annotation_string) 2247 printf(" %s\n", last_annotation_string); 2248 } 2249 brw_disasm(stdout, &p->store[i], intel->gen); 2250 } 2251 printf("\n"); 2252 } 2253 2254 c->prog_data.total_grf = v.grf_used; 2255 c->prog_data.total_scratch = 0; 2256 2257 return GL_TRUE; 2258} 2259