brw_fs.cpp revision 9ac910cfcddf1b6e7c520261371e78fc9bcbddcf
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "program/prog_parameter.h" 35#include "program/prog_print.h" 36#include "program/prog_optimize.h" 37#include "program/sampler.h" 38#include "program/hash_table.h" 39#include "brw_context.h" 40#include "brw_eu.h" 41#include "brw_wm.h" 42#include "talloc.h" 43} 44#include "../glsl/glsl_types.h" 45#include "../glsl/ir_optimization.h" 46#include "../glsl/ir_print_visitor.h" 47 48enum register_file { 49 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 50 GRF = BRW_GENERAL_REGISTER_FILE, 51 MRF = BRW_MESSAGE_REGISTER_FILE, 52 IMM = BRW_IMMEDIATE_VALUE, 53 FIXED_HW_REG, /* a struct brw_reg */ 54 UNIFORM, /* prog_data->params[hw_reg] */ 55 BAD_FILE 56}; 57 58enum fs_opcodes { 59 FS_OPCODE_FB_WRITE = 256, 60 FS_OPCODE_RCP, 61 FS_OPCODE_RSQ, 62 FS_OPCODE_SQRT, 63 FS_OPCODE_EXP2, 64 FS_OPCODE_LOG2, 65 FS_OPCODE_POW, 66 FS_OPCODE_SIN, 67 FS_OPCODE_COS, 68 FS_OPCODE_DDX, 69 FS_OPCODE_DDY, 70 FS_OPCODE_LINTERP, 71 FS_OPCODE_TEX, 72 FS_OPCODE_TXB, 73 FS_OPCODE_TXL, 74 FS_OPCODE_DISCARD, 75}; 76 77static int using_new_fs = -1; 78static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); 79 80struct gl_shader * 81brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 82{ 83 struct brw_shader *shader; 84 85 shader = talloc_zero(NULL, struct brw_shader); 86 if (shader) { 87 shader->base.Type = type; 88 shader->base.Name = name; 89 _mesa_init_shader(ctx, &shader->base); 90 } 91 92 return &shader->base; 93} 94 95struct gl_shader_program * 96brw_new_shader_program(GLcontext *ctx, GLuint name) 97{ 98 struct brw_shader_program *prog; 99 prog = talloc_zero(NULL, struct brw_shader_program); 100 if (prog) { 101 prog->base.Name = name; 102 _mesa_init_shader_program(ctx, &prog->base); 103 } 104 return &prog->base; 105} 106 107GLboolean 108brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 109{ 110 if (!_mesa_ir_compile_shader(ctx, shader)) 111 return GL_FALSE; 112 113 return GL_TRUE; 114} 115 116GLboolean 117brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 118{ 119 if (using_new_fs == -1) 120 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 121 122 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 123 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 124 125 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 126 void *mem_ctx = talloc_new(NULL); 127 bool progress; 128 129 if (shader->ir) 130 talloc_free(shader->ir); 131 shader->ir = new(shader) exec_list; 132 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 133 134 do_mat_op_to_vec(shader->ir); 135 do_mod_to_fract(shader->ir); 136 do_div_to_mul_rcp(shader->ir); 137 do_sub_to_add_neg(shader->ir); 138 do_explog_to_explog2(shader->ir); 139 140 do { 141 progress = false; 142 143 brw_do_channel_expressions(shader->ir); 144 brw_do_vector_splitting(shader->ir); 145 146 progress = do_lower_jumps(shader->ir, true, true, 147 true, /* main return */ 148 false, /* continue */ 149 false /* loops */ 150 ) || progress; 151 152 progress = do_common_optimization(shader->ir, true, 32) || progress; 153 154 progress = lower_noise(shader->ir) || progress; 155 progress = 156 lower_variable_index_to_cond_assign(shader->ir, 157 GL_TRUE, /* input */ 158 GL_TRUE, /* output */ 159 GL_TRUE, /* temp */ 160 GL_TRUE /* uniform */ 161 ) || progress; 162 } while (progress); 163 164 validate_ir_tree(shader->ir); 165 166 reparent_ir(shader->ir, shader->ir); 167 talloc_free(mem_ctx); 168 } 169 } 170 171 if (!_mesa_ir_link_shader(ctx, prog)) 172 return GL_FALSE; 173 174 return GL_TRUE; 175} 176 177static int 178type_size(const struct glsl_type *type) 179{ 180 unsigned int size, i; 181 182 switch (type->base_type) { 183 case GLSL_TYPE_UINT: 184 case GLSL_TYPE_INT: 185 case GLSL_TYPE_FLOAT: 186 case GLSL_TYPE_BOOL: 187 return type->components(); 188 case GLSL_TYPE_ARRAY: 189 return type_size(type->fields.array) * type->length; 190 case GLSL_TYPE_STRUCT: 191 size = 0; 192 for (i = 0; i < type->length; i++) { 193 size += type_size(type->fields.structure[i].type); 194 } 195 return size; 196 case GLSL_TYPE_SAMPLER: 197 /* Samplers take up no register space, since they're baked in at 198 * link time. 199 */ 200 return 0; 201 default: 202 assert(!"not reached"); 203 return 0; 204 } 205} 206 207class fs_reg { 208public: 209 /* Callers of this talloc-based new need not call delete. It's 210 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 211 static void* operator new(size_t size, void *ctx) 212 { 213 void *node; 214 215 node = talloc_size(ctx, size); 216 assert(node != NULL); 217 218 return node; 219 } 220 221 void init() 222 { 223 this->reg = 0; 224 this->reg_offset = 0; 225 this->negate = 0; 226 this->abs = 0; 227 this->hw_reg = -1; 228 } 229 230 /** Generic unset register constructor. */ 231 fs_reg() 232 { 233 init(); 234 this->file = BAD_FILE; 235 } 236 237 /** Immediate value constructor. */ 238 fs_reg(float f) 239 { 240 init(); 241 this->file = IMM; 242 this->type = BRW_REGISTER_TYPE_F; 243 this->imm.f = f; 244 } 245 246 /** Immediate value constructor. */ 247 fs_reg(int32_t i) 248 { 249 init(); 250 this->file = IMM; 251 this->type = BRW_REGISTER_TYPE_D; 252 this->imm.i = i; 253 } 254 255 /** Immediate value constructor. */ 256 fs_reg(uint32_t u) 257 { 258 init(); 259 this->file = IMM; 260 this->type = BRW_REGISTER_TYPE_UD; 261 this->imm.u = u; 262 } 263 264 /** Fixed brw_reg Immediate value constructor. */ 265 fs_reg(struct brw_reg fixed_hw_reg) 266 { 267 init(); 268 this->file = FIXED_HW_REG; 269 this->fixed_hw_reg = fixed_hw_reg; 270 this->type = fixed_hw_reg.type; 271 } 272 273 fs_reg(enum register_file file, int hw_reg); 274 fs_reg(class fs_visitor *v, const struct glsl_type *type); 275 276 /** Register file: ARF, GRF, MRF, IMM. */ 277 enum register_file file; 278 /** Abstract register number. 0 = fixed hw reg */ 279 int reg; 280 /** Offset within the abstract register. */ 281 int reg_offset; 282 /** HW register number. Generally unset until register allocation. */ 283 int hw_reg; 284 /** Register type. BRW_REGISTER_TYPE_* */ 285 int type; 286 bool negate; 287 bool abs; 288 struct brw_reg fixed_hw_reg; 289 290 /** Value for file == BRW_IMMMEDIATE_FILE */ 291 union { 292 int32_t i; 293 uint32_t u; 294 float f; 295 } imm; 296}; 297 298static const fs_reg reg_undef; 299static const fs_reg reg_null(ARF, BRW_ARF_NULL); 300 301class fs_inst : public exec_node { 302public: 303 /* Callers of this talloc-based new need not call delete. It's 304 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 305 static void* operator new(size_t size, void *ctx) 306 { 307 void *node; 308 309 node = talloc_zero_size(ctx, size); 310 assert(node != NULL); 311 312 return node; 313 } 314 315 void init() 316 { 317 this->opcode = BRW_OPCODE_NOP; 318 this->saturate = false; 319 this->conditional_mod = BRW_CONDITIONAL_NONE; 320 this->predicated = false; 321 this->sampler = 0; 322 this->target = 0; 323 this->eot = false; 324 this->shadow_compare = false; 325 } 326 327 fs_inst() 328 { 329 init(); 330 } 331 332 fs_inst(int opcode) 333 { 334 init(); 335 this->opcode = opcode; 336 } 337 338 fs_inst(int opcode, fs_reg dst, fs_reg src0) 339 { 340 init(); 341 this->opcode = opcode; 342 this->dst = dst; 343 this->src[0] = src0; 344 } 345 346 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 347 { 348 init(); 349 this->opcode = opcode; 350 this->dst = dst; 351 this->src[0] = src0; 352 this->src[1] = src1; 353 } 354 355 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 356 { 357 init(); 358 this->opcode = opcode; 359 this->dst = dst; 360 this->src[0] = src0; 361 this->src[1] = src1; 362 this->src[2] = src2; 363 } 364 365 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 366 fs_reg dst; 367 fs_reg src[3]; 368 bool saturate; 369 bool predicated; 370 int conditional_mod; /**< BRW_CONDITIONAL_* */ 371 372 int mlen; /**< SEND message length */ 373 int sampler; 374 int target; /**< MRT target. */ 375 bool eot; 376 bool shadow_compare; 377 378 /** @{ 379 * Annotation for the generated IR. One of the two can be set. 380 */ 381 ir_instruction *ir; 382 const char *annotation; 383 /** @} */ 384}; 385 386class fs_visitor : public ir_visitor 387{ 388public: 389 390 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 391 { 392 this->c = c; 393 this->p = &c->func; 394 this->brw = p->brw; 395 this->fp = brw->fragment_program; 396 this->intel = &brw->intel; 397 this->ctx = &intel->ctx; 398 this->mem_ctx = talloc_new(NULL); 399 this->shader = shader; 400 this->fail = false; 401 this->next_abstract_grf = 1; 402 this->variable_ht = hash_table_ctor(0, 403 hash_table_pointer_hash, 404 hash_table_pointer_compare); 405 406 this->frag_color = NULL; 407 this->frag_data = NULL; 408 this->frag_depth = NULL; 409 this->first_non_payload_grf = 0; 410 411 this->current_annotation = NULL; 412 this->annotation_string = NULL; 413 this->annotation_ir = NULL; 414 this->base_ir = NULL; 415 } 416 ~fs_visitor() 417 { 418 talloc_free(this->mem_ctx); 419 hash_table_dtor(this->variable_ht); 420 } 421 422 fs_reg *variable_storage(ir_variable *var); 423 424 void visit(ir_variable *ir); 425 void visit(ir_assignment *ir); 426 void visit(ir_dereference_variable *ir); 427 void visit(ir_dereference_record *ir); 428 void visit(ir_dereference_array *ir); 429 void visit(ir_expression *ir); 430 void visit(ir_texture *ir); 431 void visit(ir_if *ir); 432 void visit(ir_constant *ir); 433 void visit(ir_swizzle *ir); 434 void visit(ir_return *ir); 435 void visit(ir_loop *ir); 436 void visit(ir_loop_jump *ir); 437 void visit(ir_discard *ir); 438 void visit(ir_call *ir); 439 void visit(ir_function *ir); 440 void visit(ir_function_signature *ir); 441 442 fs_inst *emit(fs_inst inst); 443 void assign_curb_setup(); 444 void assign_urb_setup(); 445 void assign_regs(); 446 void generate_code(); 447 void generate_fb_write(fs_inst *inst); 448 void generate_linterp(fs_inst *inst, struct brw_reg dst, 449 struct brw_reg *src); 450 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 451 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 452 void generate_discard(fs_inst *inst); 453 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 454 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 455 456 void emit_dummy_fs(); 457 void emit_fragcoord_interpolation(ir_variable *ir); 458 void emit_general_interpolation(ir_variable *ir); 459 void emit_interpolation_setup(); 460 void emit_fb_writes(); 461 462 struct brw_reg interp_reg(int location, int channel); 463 int setup_uniform_values(int loc, const glsl_type *type); 464 465 struct brw_context *brw; 466 const struct gl_fragment_program *fp; 467 struct intel_context *intel; 468 GLcontext *ctx; 469 struct brw_wm_compile *c; 470 struct brw_compile *p; 471 struct brw_shader *shader; 472 void *mem_ctx; 473 exec_list instructions; 474 int next_abstract_grf; 475 struct hash_table *variable_ht; 476 ir_variable *frag_color, *frag_data, *frag_depth; 477 int first_non_payload_grf; 478 479 /** @{ debug annotation info */ 480 const char *current_annotation; 481 ir_instruction *base_ir; 482 const char **annotation_string; 483 ir_instruction **annotation_ir; 484 /** @} */ 485 486 bool fail; 487 488 /* Result of last visit() method. */ 489 fs_reg result; 490 491 fs_reg pixel_x; 492 fs_reg pixel_y; 493 fs_reg wpos_w; 494 fs_reg pixel_w; 495 fs_reg delta_x; 496 fs_reg delta_y; 497 498 int grf_used; 499 500}; 501 502/** Fixed HW reg constructor. */ 503fs_reg::fs_reg(enum register_file file, int hw_reg) 504{ 505 init(); 506 this->file = file; 507 this->hw_reg = hw_reg; 508 this->type = BRW_REGISTER_TYPE_F; 509} 510 511int 512brw_type_for_base_type(const struct glsl_type *type) 513{ 514 switch (type->base_type) { 515 case GLSL_TYPE_FLOAT: 516 return BRW_REGISTER_TYPE_F; 517 case GLSL_TYPE_INT: 518 case GLSL_TYPE_BOOL: 519 return BRW_REGISTER_TYPE_D; 520 case GLSL_TYPE_UINT: 521 return BRW_REGISTER_TYPE_UD; 522 case GLSL_TYPE_ARRAY: 523 case GLSL_TYPE_STRUCT: 524 /* These should be overridden with the type of the member when 525 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 526 * way to trip up if we don't. 527 */ 528 return BRW_REGISTER_TYPE_UD; 529 default: 530 assert(!"not reached"); 531 return BRW_REGISTER_TYPE_F; 532 } 533} 534 535/** Automatic reg constructor. */ 536fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 537{ 538 init(); 539 540 this->file = GRF; 541 this->reg = v->next_abstract_grf; 542 this->reg_offset = 0; 543 v->next_abstract_grf += type_size(type); 544 this->type = brw_type_for_base_type(type); 545} 546 547fs_reg * 548fs_visitor::variable_storage(ir_variable *var) 549{ 550 return (fs_reg *)hash_table_find(this->variable_ht, var); 551} 552 553/* Our support for uniforms is piggy-backed on the struct 554 * gl_fragment_program, because that's where the values actually 555 * get stored, rather than in some global gl_shader_program uniform 556 * store. 557 */ 558int 559fs_visitor::setup_uniform_values(int loc, const glsl_type *type) 560{ 561 unsigned int offset = 0; 562 float *vec_values; 563 564 if (type->is_matrix()) { 565 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 566 type->vector_elements, 567 1); 568 569 for (unsigned int i = 0; i < type->matrix_columns; i++) { 570 offset += setup_uniform_values(loc + offset, column); 571 } 572 573 return offset; 574 } 575 576 switch (type->base_type) { 577 case GLSL_TYPE_FLOAT: 578 case GLSL_TYPE_UINT: 579 case GLSL_TYPE_INT: 580 case GLSL_TYPE_BOOL: 581 vec_values = fp->Base.Parameters->ParameterValues[loc]; 582 for (unsigned int i = 0; i < type->vector_elements; i++) { 583 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 584 } 585 return 1; 586 587 case GLSL_TYPE_STRUCT: 588 for (unsigned int i = 0; i < type->length; i++) { 589 offset += setup_uniform_values(loc + offset, 590 type->fields.structure[i].type); 591 } 592 return offset; 593 594 case GLSL_TYPE_ARRAY: 595 for (unsigned int i = 0; i < type->length; i++) { 596 offset += setup_uniform_values(loc + offset, type->fields.array); 597 } 598 return offset; 599 600 case GLSL_TYPE_SAMPLER: 601 /* The sampler takes up a slot, but we don't use any values from it. */ 602 return 1; 603 604 default: 605 assert(!"not reached"); 606 return 0; 607 } 608} 609 610void 611fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) 612{ 613 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 614 fs_reg wpos = *reg; 615 fs_reg neg_y = this->pixel_y; 616 neg_y.negate = true; 617 618 /* gl_FragCoord.x */ 619 if (ir->pixel_center_integer) { 620 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); 621 } else { 622 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); 623 } 624 wpos.reg_offset++; 625 626 /* gl_FragCoord.y */ 627 if (ir->origin_upper_left && ir->pixel_center_integer) { 628 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); 629 } else { 630 fs_reg pixel_y = this->pixel_y; 631 float offset = (ir->pixel_center_integer ? 0.0 : 0.5); 632 633 if (!ir->origin_upper_left) { 634 pixel_y.negate = true; 635 offset += c->key.drawable_height - 1.0; 636 } 637 638 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); 639 } 640 wpos.reg_offset++; 641 642 /* gl_FragCoord.z */ 643 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 644 interp_reg(FRAG_ATTRIB_WPOS, 2))); 645 wpos.reg_offset++; 646 647 /* gl_FragCoord.w: Already set up in emit_interpolation */ 648 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); 649 650 hash_table_insert(this->variable_ht, reg, ir); 651} 652 653 654void 655fs_visitor::emit_general_interpolation(ir_variable *ir) 656{ 657 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type); 658 /* Interpolation is always in floating point regs. */ 659 reg->type = BRW_REGISTER_TYPE_F; 660 fs_reg attr = *reg; 661 662 unsigned int array_elements; 663 const glsl_type *type; 664 665 if (ir->type->is_array()) { 666 array_elements = ir->type->length; 667 if (array_elements == 0) { 668 this->fail = true; 669 } 670 type = ir->type->fields.array; 671 } else { 672 array_elements = 1; 673 type = ir->type; 674 } 675 676 int location = ir->location; 677 for (unsigned int i = 0; i < array_elements; i++) { 678 for (unsigned int j = 0; j < type->matrix_columns; j++) { 679 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) { 680 /* If there's no incoming setup data for this slot, don't 681 * emit interpolation for it (since it's not used, and 682 * we'd fall over later trying to find the setup data. 683 */ 684 attr.reg_offset += type->vector_elements; 685 continue; 686 } 687 688 for (unsigned int c = 0; c < type->vector_elements; c++) { 689 struct brw_reg interp = interp_reg(location, c); 690 emit(fs_inst(FS_OPCODE_LINTERP, 691 attr, 692 this->delta_x, 693 this->delta_y, 694 fs_reg(interp))); 695 attr.reg_offset++; 696 } 697 attr.reg_offset -= type->vector_elements; 698 699 for (unsigned int c = 0; c < type->vector_elements; c++) { 700 emit(fs_inst(BRW_OPCODE_MUL, 701 attr, 702 attr, 703 this->pixel_w)); 704 attr.reg_offset++; 705 } 706 location++; 707 } 708 } 709 710 hash_table_insert(this->variable_ht, reg, ir); 711} 712 713void 714fs_visitor::visit(ir_variable *ir) 715{ 716 fs_reg *reg = NULL; 717 718 if (variable_storage(ir)) 719 return; 720 721 if (strcmp(ir->name, "gl_FragColor") == 0) { 722 this->frag_color = ir; 723 } else if (strcmp(ir->name, "gl_FragData") == 0) { 724 this->frag_data = ir; 725 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 726 this->frag_depth = ir; 727 } 728 729 if (ir->mode == ir_var_in) { 730 if (!strcmp(ir->name, "gl_FragCoord")) { 731 emit_fragcoord_interpolation(ir); 732 return; 733 } else if (!strcmp(ir->name, "gl_FrontFacing")) { 734 reg = new(this->mem_ctx) fs_reg(this, ir->type); 735 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); 736 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives 737 * us front face 738 */ 739 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, 740 *reg, 741 fs_reg(r1_6ud), 742 fs_reg(1u << 31))); 743 inst->conditional_mod = BRW_CONDITIONAL_L; 744 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); 745 } else { 746 emit_general_interpolation(ir); 747 return; 748 } 749 } 750 751 if (ir->mode == ir_var_uniform) { 752 int param_index = c->prog_data.nr_params; 753 754 setup_uniform_values(ir->location, ir->type); 755 756 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 757 } 758 759 if (!reg) 760 reg = new(this->mem_ctx) fs_reg(this, ir->type); 761 762 hash_table_insert(this->variable_ht, reg, ir); 763} 764 765void 766fs_visitor::visit(ir_dereference_variable *ir) 767{ 768 fs_reg *reg = variable_storage(ir->var); 769 this->result = *reg; 770} 771 772void 773fs_visitor::visit(ir_dereference_record *ir) 774{ 775 const glsl_type *struct_type = ir->record->type; 776 777 ir->record->accept(this); 778 779 unsigned int offset = 0; 780 for (unsigned int i = 0; i < struct_type->length; i++) { 781 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 782 break; 783 offset += type_size(struct_type->fields.structure[i].type); 784 } 785 this->result.reg_offset += offset; 786 this->result.type = brw_type_for_base_type(ir->type); 787} 788 789void 790fs_visitor::visit(ir_dereference_array *ir) 791{ 792 ir_constant *index; 793 int element_size; 794 795 ir->array->accept(this); 796 index = ir->array_index->as_constant(); 797 798 element_size = type_size(ir->type); 799 this->result.type = brw_type_for_base_type(ir->type); 800 801 if (index) { 802 assert(this->result.file == UNIFORM || 803 (this->result.file == GRF && 804 this->result.reg != 0)); 805 this->result.reg_offset += index->value.i[0] * element_size; 806 } else { 807 assert(!"FINISHME: non-constant array element"); 808 } 809} 810 811void 812fs_visitor::visit(ir_expression *ir) 813{ 814 unsigned int operand; 815 fs_reg op[2], temp; 816 fs_reg result; 817 fs_inst *inst; 818 819 for (operand = 0; operand < ir->get_num_operands(); operand++) { 820 ir->operands[operand]->accept(this); 821 if (this->result.file == BAD_FILE) { 822 ir_print_visitor v; 823 printf("Failed to get tree for expression operand:\n"); 824 ir->operands[operand]->accept(&v); 825 this->fail = true; 826 } 827 op[operand] = this->result; 828 829 /* Matrix expression operands should have been broken down to vector 830 * operations already. 831 */ 832 assert(!ir->operands[operand]->type->is_matrix()); 833 /* And then those vector operands should have been broken down to scalar. 834 */ 835 assert(!ir->operands[operand]->type->is_vector()); 836 } 837 838 /* Storage for our result. If our result goes into an assignment, it will 839 * just get copy-propagated out, so no worries. 840 */ 841 this->result = fs_reg(this, ir->type); 842 843 switch (ir->operation) { 844 case ir_unop_logic_not: 845 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 846 break; 847 case ir_unop_neg: 848 op[0].negate = !op[0].negate; 849 this->result = op[0]; 850 break; 851 case ir_unop_abs: 852 op[0].abs = true; 853 this->result = op[0]; 854 break; 855 case ir_unop_sign: 856 temp = fs_reg(this, ir->type); 857 858 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 859 860 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 861 inst->conditional_mod = BRW_CONDITIONAL_G; 862 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 863 inst->predicated = true; 864 865 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 866 inst->conditional_mod = BRW_CONDITIONAL_L; 867 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 868 inst->predicated = true; 869 870 break; 871 case ir_unop_rcp: 872 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 873 break; 874 875 case ir_unop_exp2: 876 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 877 break; 878 case ir_unop_log2: 879 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 880 break; 881 case ir_unop_exp: 882 case ir_unop_log: 883 assert(!"not reached: should be handled by ir_explog_to_explog2"); 884 break; 885 case ir_unop_sin: 886 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 887 break; 888 case ir_unop_cos: 889 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 890 break; 891 892 case ir_unop_dFdx: 893 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 894 break; 895 case ir_unop_dFdy: 896 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 897 break; 898 899 case ir_binop_add: 900 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 901 break; 902 case ir_binop_sub: 903 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 904 break; 905 906 case ir_binop_mul: 907 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 908 break; 909 case ir_binop_div: 910 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 911 break; 912 case ir_binop_mod: 913 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 914 break; 915 916 case ir_binop_less: 917 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 918 inst->conditional_mod = BRW_CONDITIONAL_L; 919 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 920 break; 921 case ir_binop_greater: 922 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 923 inst->conditional_mod = BRW_CONDITIONAL_G; 924 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 925 break; 926 case ir_binop_lequal: 927 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 928 inst->conditional_mod = BRW_CONDITIONAL_LE; 929 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 930 break; 931 case ir_binop_gequal: 932 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 933 inst->conditional_mod = BRW_CONDITIONAL_GE; 934 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 935 break; 936 case ir_binop_equal: 937 case ir_binop_all_equal: /* same as nequal for scalars */ 938 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 939 inst->conditional_mod = BRW_CONDITIONAL_Z; 940 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 941 break; 942 case ir_binop_nequal: 943 case ir_binop_any_nequal: /* same as nequal for scalars */ 944 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 945 inst->conditional_mod = BRW_CONDITIONAL_NZ; 946 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 947 break; 948 949 case ir_binop_logic_xor: 950 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 951 break; 952 953 case ir_binop_logic_or: 954 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 955 break; 956 957 case ir_binop_logic_and: 958 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 959 break; 960 961 case ir_binop_dot: 962 case ir_binop_cross: 963 case ir_unop_any: 964 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 965 break; 966 967 case ir_unop_noise: 968 assert(!"not reached: should be handled by lower_noise"); 969 break; 970 971 case ir_unop_sqrt: 972 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 973 break; 974 975 case ir_unop_rsq: 976 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 977 break; 978 979 case ir_unop_i2f: 980 case ir_unop_b2f: 981 case ir_unop_b2i: 982 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 983 break; 984 case ir_unop_f2i: 985 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 986 break; 987 case ir_unop_f2b: 988 case ir_unop_i2b: 989 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 990 inst->conditional_mod = BRW_CONDITIONAL_NZ; 991 992 case ir_unop_trunc: 993 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 994 break; 995 case ir_unop_ceil: 996 op[0].negate = ~op[0].negate; 997 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 998 this->result.negate = true; 999 break; 1000 case ir_unop_floor: 1001 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 1002 break; 1003 case ir_unop_fract: 1004 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 1005 break; 1006 1007 case ir_binop_min: 1008 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1009 inst->conditional_mod = BRW_CONDITIONAL_L; 1010 1011 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1012 inst->predicated = true; 1013 break; 1014 case ir_binop_max: 1015 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 1016 inst->conditional_mod = BRW_CONDITIONAL_G; 1017 1018 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 1019 inst->predicated = true; 1020 break; 1021 1022 case ir_binop_pow: 1023 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 1024 break; 1025 1026 case ir_unop_bit_not: 1027 case ir_unop_u2f: 1028 case ir_binop_lshift: 1029 case ir_binop_rshift: 1030 case ir_binop_bit_and: 1031 case ir_binop_bit_xor: 1032 case ir_binop_bit_or: 1033 assert(!"GLSL 1.30 features unsupported"); 1034 break; 1035 } 1036} 1037 1038void 1039fs_visitor::visit(ir_assignment *ir) 1040{ 1041 struct fs_reg l, r; 1042 int i; 1043 int write_mask; 1044 fs_inst *inst; 1045 1046 /* FINISHME: arrays on the lhs */ 1047 ir->lhs->accept(this); 1048 l = this->result; 1049 1050 ir->rhs->accept(this); 1051 r = this->result; 1052 1053 /* FINISHME: This should really set to the correct maximal writemask for each 1054 * FINISHME: component written (in the loops below). This case can only 1055 * FINISHME: occur for matrices, arrays, and structures. 1056 */ 1057 if (ir->write_mask == 0) { 1058 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1059 write_mask = WRITEMASK_XYZW; 1060 } else { 1061 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 1062 write_mask = ir->write_mask; 1063 } 1064 1065 assert(l.file != BAD_FILE); 1066 assert(r.file != BAD_FILE); 1067 1068 if (ir->condition) { 1069 /* Get the condition bool into the predicate. */ 1070 ir->condition->accept(this); 1071 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0))); 1072 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1073 } 1074 1075 for (i = 0; i < type_size(ir->lhs->type); i++) { 1076 if (i >= 4 || (write_mask & (1 << i))) { 1077 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 1078 if (ir->condition) 1079 inst->predicated = true; 1080 r.reg_offset++; 1081 } 1082 l.reg_offset++; 1083 } 1084} 1085 1086void 1087fs_visitor::visit(ir_texture *ir) 1088{ 1089 int base_mrf = 2; 1090 fs_inst *inst = NULL; 1091 unsigned int mlen = 0; 1092 1093 ir->coordinate->accept(this); 1094 fs_reg coordinate = this->result; 1095 1096 if (ir->projector) { 1097 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 1098 1099 ir->projector->accept(this); 1100 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 1101 1102 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 1103 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 1104 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 1105 coordinate.reg_offset++; 1106 proj_coordinate.reg_offset++; 1107 } 1108 proj_coordinate.reg_offset = 0; 1109 1110 coordinate = proj_coordinate; 1111 } 1112 1113 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 1114 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 1115 coordinate.reg_offset++; 1116 } 1117 1118 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 1119 if (intel->gen < 5) 1120 mlen = 3; 1121 1122 if (ir->shadow_comparitor) { 1123 /* For shadow comparisons, we have to supply u,v,r. */ 1124 mlen = 3; 1125 1126 ir->shadow_comparitor->accept(this); 1127 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1128 mlen++; 1129 } 1130 1131 /* Do we ever want to handle writemasking on texture samples? Is it 1132 * performance relevant? 1133 */ 1134 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 1135 1136 switch (ir->op) { 1137 case ir_tex: 1138 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 1139 break; 1140 case ir_txb: 1141 ir->lod_info.bias->accept(this); 1142 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1143 mlen++; 1144 1145 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 1146 break; 1147 case ir_txl: 1148 ir->lod_info.lod->accept(this); 1149 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 1150 mlen++; 1151 1152 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 1153 break; 1154 case ir_txd: 1155 case ir_txf: 1156 assert(!"GLSL 1.30 features unsupported"); 1157 break; 1158 } 1159 1160 inst->sampler = 1161 _mesa_get_sampler_uniform_value(ir->sampler, 1162 ctx->Shader.CurrentProgram, 1163 &brw->fragment_program->Base); 1164 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler]; 1165 1166 this->result = dst; 1167 1168 if (ir->shadow_comparitor) 1169 inst->shadow_compare = true; 1170 inst->mlen = mlen; 1171} 1172 1173void 1174fs_visitor::visit(ir_swizzle *ir) 1175{ 1176 ir->val->accept(this); 1177 fs_reg val = this->result; 1178 1179 fs_reg result = fs_reg(this, ir->type); 1180 this->result = result; 1181 1182 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1183 fs_reg channel = val; 1184 int swiz = 0; 1185 1186 switch (i) { 1187 case 0: 1188 swiz = ir->mask.x; 1189 break; 1190 case 1: 1191 swiz = ir->mask.y; 1192 break; 1193 case 2: 1194 swiz = ir->mask.z; 1195 break; 1196 case 3: 1197 swiz = ir->mask.w; 1198 break; 1199 } 1200 1201 channel.reg_offset += swiz; 1202 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1203 result.reg_offset++; 1204 } 1205} 1206 1207void 1208fs_visitor::visit(ir_discard *ir) 1209{ 1210 assert(ir->condition == NULL); /* FINISHME */ 1211 1212 emit(fs_inst(FS_OPCODE_DISCARD)); 1213} 1214 1215void 1216fs_visitor::visit(ir_constant *ir) 1217{ 1218 fs_reg reg(this, ir->type); 1219 this->result = reg; 1220 1221 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1222 switch (ir->type->base_type) { 1223 case GLSL_TYPE_FLOAT: 1224 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1225 break; 1226 case GLSL_TYPE_UINT: 1227 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1228 break; 1229 case GLSL_TYPE_INT: 1230 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1231 break; 1232 case GLSL_TYPE_BOOL: 1233 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1234 break; 1235 default: 1236 assert(!"Non-float/uint/int/bool constant"); 1237 } 1238 reg.reg_offset++; 1239 } 1240} 1241 1242void 1243fs_visitor::visit(ir_if *ir) 1244{ 1245 fs_inst *inst; 1246 1247 /* Don't point the annotation at the if statement, because then it plus 1248 * the then and else blocks get printed. 1249 */ 1250 this->base_ir = ir->condition; 1251 1252 /* Generate the condition into the condition code. */ 1253 ir->condition->accept(this); 1254 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1255 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1256 1257 inst = emit(fs_inst(BRW_OPCODE_IF)); 1258 inst->predicated = true; 1259 1260 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1261 ir_instruction *ir = (ir_instruction *)iter.get(); 1262 this->base_ir = ir; 1263 1264 ir->accept(this); 1265 } 1266 1267 if (!ir->else_instructions.is_empty()) { 1268 emit(fs_inst(BRW_OPCODE_ELSE)); 1269 1270 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1271 ir_instruction *ir = (ir_instruction *)iter.get(); 1272 this->base_ir = ir; 1273 1274 ir->accept(this); 1275 } 1276 } 1277 1278 emit(fs_inst(BRW_OPCODE_ENDIF)); 1279} 1280 1281void 1282fs_visitor::visit(ir_loop *ir) 1283{ 1284 fs_reg counter = reg_undef; 1285 1286 if (ir->counter) { 1287 this->base_ir = ir->counter; 1288 ir->counter->accept(this); 1289 counter = *(variable_storage(ir->counter)); 1290 1291 if (ir->from) { 1292 this->base_ir = ir->from; 1293 ir->from->accept(this); 1294 1295 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); 1296 } 1297 } 1298 1299 /* Start a safety counter. If the user messed up their loop 1300 * counting, we don't want to hang the GPU. 1301 */ 1302 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1303 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1304 1305 emit(fs_inst(BRW_OPCODE_DO)); 1306 1307 if (ir->to) { 1308 this->base_ir = ir->to; 1309 ir->to->accept(this); 1310 1311 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, 1312 counter, this->result)); 1313 switch (ir->cmp) { 1314 case ir_binop_equal: 1315 inst->conditional_mod = BRW_CONDITIONAL_Z; 1316 break; 1317 case ir_binop_nequal: 1318 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1319 break; 1320 case ir_binop_gequal: 1321 inst->conditional_mod = BRW_CONDITIONAL_GE; 1322 break; 1323 case ir_binop_lequal: 1324 inst->conditional_mod = BRW_CONDITIONAL_LE; 1325 break; 1326 case ir_binop_greater: 1327 inst->conditional_mod = BRW_CONDITIONAL_G; 1328 break; 1329 case ir_binop_less: 1330 inst->conditional_mod = BRW_CONDITIONAL_L; 1331 break; 1332 default: 1333 assert(!"not reached: unknown loop condition"); 1334 this->fail = true; 1335 break; 1336 } 1337 1338 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1339 inst->predicated = true; 1340 } 1341 1342 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1343 ir_instruction *ir = (ir_instruction *)iter.get(); 1344 fs_inst *inst; 1345 1346 this->base_ir = ir; 1347 ir->accept(this); 1348 1349 /* Check the maximum loop iters counter. */ 1350 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1351 inst->conditional_mod = BRW_CONDITIONAL_Z; 1352 1353 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1354 inst->predicated = true; 1355 } 1356 1357 if (ir->increment) { 1358 this->base_ir = ir->increment; 1359 ir->increment->accept(this); 1360 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); 1361 } 1362 1363 emit(fs_inst(BRW_OPCODE_WHILE)); 1364} 1365 1366void 1367fs_visitor::visit(ir_loop_jump *ir) 1368{ 1369 switch (ir->mode) { 1370 case ir_loop_jump::jump_break: 1371 emit(fs_inst(BRW_OPCODE_BREAK)); 1372 break; 1373 case ir_loop_jump::jump_continue: 1374 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1375 break; 1376 } 1377} 1378 1379void 1380fs_visitor::visit(ir_call *ir) 1381{ 1382 assert(!"FINISHME"); 1383} 1384 1385void 1386fs_visitor::visit(ir_return *ir) 1387{ 1388 assert(!"FINISHME"); 1389} 1390 1391void 1392fs_visitor::visit(ir_function *ir) 1393{ 1394 /* Ignore function bodies other than main() -- we shouldn't see calls to 1395 * them since they should all be inlined before we get to ir_to_mesa. 1396 */ 1397 if (strcmp(ir->name, "main") == 0) { 1398 const ir_function_signature *sig; 1399 exec_list empty; 1400 1401 sig = ir->matching_signature(&empty); 1402 1403 assert(sig); 1404 1405 foreach_iter(exec_list_iterator, iter, sig->body) { 1406 ir_instruction *ir = (ir_instruction *)iter.get(); 1407 this->base_ir = ir; 1408 1409 ir->accept(this); 1410 } 1411 } 1412} 1413 1414void 1415fs_visitor::visit(ir_function_signature *ir) 1416{ 1417 assert(!"not reached"); 1418 (void)ir; 1419} 1420 1421fs_inst * 1422fs_visitor::emit(fs_inst inst) 1423{ 1424 fs_inst *list_inst = new(mem_ctx) fs_inst; 1425 *list_inst = inst; 1426 1427 list_inst->annotation = this->current_annotation; 1428 list_inst->ir = this->base_ir; 1429 1430 this->instructions.push_tail(list_inst); 1431 1432 return list_inst; 1433} 1434 1435/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1436void 1437fs_visitor::emit_dummy_fs() 1438{ 1439 /* Everyone's favorite color. */ 1440 emit(fs_inst(BRW_OPCODE_MOV, 1441 fs_reg(MRF, 2), 1442 fs_reg(1.0f))); 1443 emit(fs_inst(BRW_OPCODE_MOV, 1444 fs_reg(MRF, 3), 1445 fs_reg(0.0f))); 1446 emit(fs_inst(BRW_OPCODE_MOV, 1447 fs_reg(MRF, 4), 1448 fs_reg(1.0f))); 1449 emit(fs_inst(BRW_OPCODE_MOV, 1450 fs_reg(MRF, 5), 1451 fs_reg(0.0f))); 1452 1453 fs_inst *write; 1454 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1455 fs_reg(0), 1456 fs_reg(0))); 1457} 1458 1459/* The register location here is relative to the start of the URB 1460 * data. It will get adjusted to be a real location before 1461 * generate_code() time. 1462 */ 1463struct brw_reg 1464fs_visitor::interp_reg(int location, int channel) 1465{ 1466 int regnr = location * 2 + channel / 2; 1467 int stride = (channel & 1) * 4; 1468 1469 return brw_vec1_grf(regnr, stride); 1470} 1471 1472/** Emits the interpolation for the varying inputs. */ 1473void 1474fs_visitor::emit_interpolation_setup() 1475{ 1476 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1477 1478 this->current_annotation = "compute pixel centers"; 1479 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1480 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1481 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1482 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1483 emit(fs_inst(BRW_OPCODE_ADD, 1484 this->pixel_x, 1485 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1486 fs_reg(brw_imm_v(0x10101010)))); 1487 emit(fs_inst(BRW_OPCODE_ADD, 1488 this->pixel_y, 1489 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1490 fs_reg(brw_imm_v(0x11001100)))); 1491 1492 this->current_annotation = "compute pixel deltas from v0"; 1493 this->delta_x = fs_reg(this, glsl_type::float_type); 1494 this->delta_y = fs_reg(this, glsl_type::float_type); 1495 emit(fs_inst(BRW_OPCODE_ADD, 1496 this->delta_x, 1497 this->pixel_x, 1498 fs_reg(negate(brw_vec1_grf(1, 0))))); 1499 emit(fs_inst(BRW_OPCODE_ADD, 1500 this->delta_y, 1501 this->pixel_y, 1502 fs_reg(negate(brw_vec1_grf(1, 1))))); 1503 1504 this->current_annotation = "compute pos.w and 1/pos.w"; 1505 /* Compute wpos.w. It's always in our setup, since it's needed to 1506 * interpolate the other attributes. 1507 */ 1508 this->wpos_w = fs_reg(this, glsl_type::float_type); 1509 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, 1510 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1511 /* Compute the pixel 1/W value from wpos.w. */ 1512 this->pixel_w = fs_reg(this, glsl_type::float_type); 1513 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w)); 1514 this->current_annotation = NULL; 1515} 1516 1517void 1518fs_visitor::emit_fb_writes() 1519{ 1520 this->current_annotation = "FB write header"; 1521 int nr = 0; 1522 1523 /* m0, m1 header */ 1524 nr += 2; 1525 1526 if (c->key.aa_dest_stencil_reg) { 1527 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1528 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); 1529 } 1530 1531 /* Reserve space for color. It'll be filled in per MRT below. */ 1532 int color_mrf = nr; 1533 nr += 4; 1534 1535 if (c->key.source_depth_to_render_target) { 1536 if (c->key.computes_depth) { 1537 /* Hand over gl_FragDepth. */ 1538 assert(this->frag_depth); 1539 fs_reg depth = *(variable_storage(this->frag_depth)); 1540 1541 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); 1542 } else { 1543 /* Pass through the payload depth. */ 1544 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1545 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); 1546 } 1547 } 1548 1549 if (c->key.dest_depth_reg) { 1550 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), 1551 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); 1552 } 1553 1554 fs_reg color = reg_undef; 1555 if (this->frag_color) 1556 color = *(variable_storage(this->frag_color)); 1557 else if (this->frag_data) 1558 color = *(variable_storage(this->frag_data)); 1559 1560 for (int target = 0; target < c->key.nr_color_regions; target++) { 1561 this->current_annotation = talloc_asprintf(this->mem_ctx, 1562 "FB write target %d", 1563 target); 1564 if (this->frag_color || this->frag_data) { 1565 for (int i = 0; i < 4; i++) { 1566 emit(fs_inst(BRW_OPCODE_MOV, 1567 fs_reg(MRF, color_mrf + i), 1568 color)); 1569 color.reg_offset++; 1570 } 1571 } 1572 1573 if (this->frag_color) 1574 color.reg_offset -= 4; 1575 1576 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1577 reg_undef, reg_undef)); 1578 inst->target = target; 1579 inst->mlen = nr; 1580 if (target == c->key.nr_color_regions - 1) 1581 inst->eot = true; 1582 } 1583 1584 if (c->key.nr_color_regions == 0) { 1585 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, 1586 reg_undef, reg_undef)); 1587 inst->mlen = nr; 1588 inst->eot = true; 1589 } 1590 1591 this->current_annotation = NULL; 1592} 1593 1594void 1595fs_visitor::generate_fb_write(fs_inst *inst) 1596{ 1597 GLboolean eot = inst->eot; 1598 1599 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1600 * move, here's g1. 1601 */ 1602 brw_push_insn_state(p); 1603 brw_set_mask_control(p, BRW_MASK_DISABLE); 1604 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1605 brw_MOV(p, 1606 brw_message_reg(1), 1607 brw_vec8_grf(1, 0)); 1608 brw_pop_insn_state(p); 1609 1610 brw_fb_WRITE(p, 1611 8, /* dispatch_width */ 1612 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1613 0, /* base MRF */ 1614 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1615 inst->target, 1616 inst->mlen, 1617 0, 1618 eot); 1619} 1620 1621void 1622fs_visitor::generate_linterp(fs_inst *inst, 1623 struct brw_reg dst, struct brw_reg *src) 1624{ 1625 struct brw_reg delta_x = src[0]; 1626 struct brw_reg delta_y = src[1]; 1627 struct brw_reg interp = src[2]; 1628 1629 if (brw->has_pln && 1630 delta_y.nr == delta_x.nr + 1 && 1631 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1632 brw_PLN(p, dst, interp, delta_x); 1633 } else { 1634 brw_LINE(p, brw_null_reg(), interp, delta_x); 1635 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1636 } 1637} 1638 1639void 1640fs_visitor::generate_math(fs_inst *inst, 1641 struct brw_reg dst, struct brw_reg *src) 1642{ 1643 int op; 1644 1645 switch (inst->opcode) { 1646 case FS_OPCODE_RCP: 1647 op = BRW_MATH_FUNCTION_INV; 1648 break; 1649 case FS_OPCODE_RSQ: 1650 op = BRW_MATH_FUNCTION_RSQ; 1651 break; 1652 case FS_OPCODE_SQRT: 1653 op = BRW_MATH_FUNCTION_SQRT; 1654 break; 1655 case FS_OPCODE_EXP2: 1656 op = BRW_MATH_FUNCTION_EXP; 1657 break; 1658 case FS_OPCODE_LOG2: 1659 op = BRW_MATH_FUNCTION_LOG; 1660 break; 1661 case FS_OPCODE_POW: 1662 op = BRW_MATH_FUNCTION_POW; 1663 break; 1664 case FS_OPCODE_SIN: 1665 op = BRW_MATH_FUNCTION_SIN; 1666 break; 1667 case FS_OPCODE_COS: 1668 op = BRW_MATH_FUNCTION_COS; 1669 break; 1670 default: 1671 assert(!"not reached: unknown math function"); 1672 op = 0; 1673 break; 1674 } 1675 1676 if (inst->opcode == FS_OPCODE_POW) { 1677 brw_MOV(p, brw_message_reg(3), src[1]); 1678 } 1679 1680 brw_math(p, dst, 1681 op, 1682 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1683 BRW_MATH_SATURATE_NONE, 1684 2, src[0], 1685 BRW_MATH_DATA_VECTOR, 1686 BRW_MATH_PRECISION_FULL); 1687} 1688 1689void 1690fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1691{ 1692 int msg_type = -1; 1693 int rlen = 4; 1694 1695 if (intel->gen == 5) { 1696 switch (inst->opcode) { 1697 case FS_OPCODE_TEX: 1698 if (inst->shadow_compare) { 1699 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1700 } else { 1701 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1702 } 1703 break; 1704 case FS_OPCODE_TXB: 1705 if (inst->shadow_compare) { 1706 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1707 } else { 1708 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1709 } 1710 break; 1711 } 1712 } else { 1713 switch (inst->opcode) { 1714 case FS_OPCODE_TEX: 1715 /* Note that G45 and older determines shadow compare and dispatch width 1716 * from message length for most messages. 1717 */ 1718 if (inst->shadow_compare) { 1719 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1720 } else { 1721 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1722 } 1723 case FS_OPCODE_TXB: 1724 if (inst->shadow_compare) { 1725 assert(!"FINISHME: shadow compare with bias."); 1726 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1727 } else { 1728 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1729 rlen = 8; 1730 } 1731 break; 1732 } 1733 } 1734 assert(msg_type != -1); 1735 1736 /* g0 header. */ 1737 src.nr--; 1738 1739 brw_SAMPLE(p, 1740 retype(dst, BRW_REGISTER_TYPE_UW), 1741 src.nr, 1742 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1743 SURF_INDEX_TEXTURE(inst->sampler), 1744 inst->sampler, 1745 WRITEMASK_XYZW, 1746 msg_type, 1747 rlen, 1748 inst->mlen + 1, 1749 0, 1750 1, 1751 BRW_SAMPLER_SIMD_MODE_SIMD8); 1752} 1753 1754 1755/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1756 * looking like: 1757 * 1758 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1759 * 1760 * and we're trying to produce: 1761 * 1762 * DDX DDY 1763 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1764 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1765 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1766 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1767 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1768 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1769 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1770 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1771 * 1772 * and add another set of two more subspans if in 16-pixel dispatch mode. 1773 * 1774 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1775 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1776 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1777 * between each other. We could probably do it like ddx and swizzle the right 1778 * order later, but bail for now and just produce 1779 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1780 */ 1781void 1782fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1783{ 1784 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1785 BRW_REGISTER_TYPE_F, 1786 BRW_VERTICAL_STRIDE_2, 1787 BRW_WIDTH_2, 1788 BRW_HORIZONTAL_STRIDE_0, 1789 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1790 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1791 BRW_REGISTER_TYPE_F, 1792 BRW_VERTICAL_STRIDE_2, 1793 BRW_WIDTH_2, 1794 BRW_HORIZONTAL_STRIDE_0, 1795 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1796 brw_ADD(p, dst, src0, negate(src1)); 1797} 1798 1799void 1800fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1801{ 1802 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1803 BRW_REGISTER_TYPE_F, 1804 BRW_VERTICAL_STRIDE_4, 1805 BRW_WIDTH_4, 1806 BRW_HORIZONTAL_STRIDE_0, 1807 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1808 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1809 BRW_REGISTER_TYPE_F, 1810 BRW_VERTICAL_STRIDE_4, 1811 BRW_WIDTH_4, 1812 BRW_HORIZONTAL_STRIDE_0, 1813 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1814 brw_ADD(p, dst, src0, negate(src1)); 1815} 1816 1817void 1818fs_visitor::generate_discard(fs_inst *inst) 1819{ 1820 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1821 brw_push_insn_state(p); 1822 brw_set_mask_control(p, BRW_MASK_DISABLE); 1823 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ 1824 brw_AND(p, g0, c->emit_mask_reg, g0); 1825 brw_pop_insn_state(p); 1826} 1827 1828static void 1829trivial_assign_reg(int header_size, fs_reg *reg) 1830{ 1831 if (reg->file == GRF && reg->reg != 0) { 1832 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; 1833 reg->reg = 0; 1834 } 1835} 1836 1837void 1838fs_visitor::assign_curb_setup() 1839{ 1840 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1841 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1842 1843 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1844 c->prog_data.curb_read_length) & 1) { 1845 /* Align the start of the interpolation coefficients so that we can use 1846 * the PLN instruction. 1847 */ 1848 c->prog_data.first_curbe_grf++; 1849 } 1850 1851 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1852 foreach_iter(exec_list_iterator, iter, this->instructions) { 1853 fs_inst *inst = (fs_inst *)iter.get(); 1854 1855 for (unsigned int i = 0; i < 3; i++) { 1856 if (inst->src[i].file == UNIFORM) { 1857 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1858 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1859 constant_nr / 8, 1860 constant_nr % 8); 1861 1862 inst->src[i].file = FIXED_HW_REG; 1863 inst->src[i].fixed_hw_reg = brw_reg; 1864 } 1865 } 1866 } 1867} 1868 1869void 1870fs_visitor::assign_urb_setup() 1871{ 1872 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1873 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1874 1875 c->prog_data.urb_read_length = 0; 1876 1877 /* Figure out where each of the incoming setup attributes lands. */ 1878 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1879 interp_reg_nr[i] = -1; 1880 1881 if (i != FRAG_ATTRIB_WPOS && 1882 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1883 continue; 1884 1885 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1886 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1887 c->prog_data.urb_read_length += 2; 1888 } 1889 1890 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1891 * the correct setup input. 1892 */ 1893 foreach_iter(exec_list_iterator, iter, this->instructions) { 1894 fs_inst *inst = (fs_inst *)iter.get(); 1895 1896 if (inst->opcode != FS_OPCODE_LINTERP) 1897 continue; 1898 1899 assert(inst->src[2].file == FIXED_HW_REG); 1900 1901 int location = inst->src[2].fixed_hw_reg.nr / 2; 1902 assert(interp_reg_nr[location] != -1); 1903 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1904 (inst->src[2].fixed_hw_reg.nr & 1)); 1905 } 1906 1907 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1908} 1909 1910void 1911fs_visitor::assign_regs() 1912{ 1913 int header_size = this->first_non_payload_grf; 1914 int last_grf = 0; 1915 1916 /* FINISHME: trivial assignment of register numbers */ 1917 foreach_iter(exec_list_iterator, iter, this->instructions) { 1918 fs_inst *inst = (fs_inst *)iter.get(); 1919 1920 trivial_assign_reg(header_size, &inst->dst); 1921 trivial_assign_reg(header_size, &inst->src[0]); 1922 trivial_assign_reg(header_size, &inst->src[1]); 1923 1924 last_grf = MAX2(last_grf, inst->dst.hw_reg); 1925 last_grf = MAX2(last_grf, inst->src[0].hw_reg); 1926 last_grf = MAX2(last_grf, inst->src[1].hw_reg); 1927 } 1928 1929 this->grf_used = last_grf + 1; 1930} 1931 1932static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 1933{ 1934 struct brw_reg brw_reg; 1935 1936 switch (reg->file) { 1937 case GRF: 1938 case ARF: 1939 case MRF: 1940 brw_reg = brw_vec8_reg(reg->file, 1941 reg->hw_reg, 0); 1942 brw_reg = retype(brw_reg, reg->type); 1943 break; 1944 case IMM: 1945 switch (reg->type) { 1946 case BRW_REGISTER_TYPE_F: 1947 brw_reg = brw_imm_f(reg->imm.f); 1948 break; 1949 case BRW_REGISTER_TYPE_D: 1950 brw_reg = brw_imm_d(reg->imm.i); 1951 break; 1952 case BRW_REGISTER_TYPE_UD: 1953 brw_reg = brw_imm_ud(reg->imm.u); 1954 break; 1955 default: 1956 assert(!"not reached"); 1957 break; 1958 } 1959 break; 1960 case FIXED_HW_REG: 1961 brw_reg = reg->fixed_hw_reg; 1962 break; 1963 case BAD_FILE: 1964 /* Probably unused. */ 1965 brw_reg = brw_null_reg(); 1966 break; 1967 case UNIFORM: 1968 assert(!"not reached"); 1969 brw_reg = brw_null_reg(); 1970 break; 1971 } 1972 if (reg->abs) 1973 brw_reg = brw_abs(brw_reg); 1974 if (reg->negate) 1975 brw_reg = negate(brw_reg); 1976 1977 return brw_reg; 1978} 1979 1980void 1981fs_visitor::generate_code() 1982{ 1983 unsigned int annotation_len = 0; 1984 int last_native_inst = 0; 1985 struct brw_instruction *if_stack[16], *loop_stack[16]; 1986 int if_stack_depth = 0, loop_stack_depth = 0; 1987 int if_depth_in_loop[16]; 1988 1989 if_depth_in_loop[loop_stack_depth] = 0; 1990 1991 memset(&if_stack, 0, sizeof(if_stack)); 1992 foreach_iter(exec_list_iterator, iter, this->instructions) { 1993 fs_inst *inst = (fs_inst *)iter.get(); 1994 struct brw_reg src[3], dst; 1995 1996 for (unsigned int i = 0; i < 3; i++) { 1997 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 1998 } 1999 dst = brw_reg_from_fs_reg(&inst->dst); 2000 2001 brw_set_conditionalmod(p, inst->conditional_mod); 2002 brw_set_predicate_control(p, inst->predicated); 2003 2004 switch (inst->opcode) { 2005 case BRW_OPCODE_MOV: 2006 brw_MOV(p, dst, src[0]); 2007 break; 2008 case BRW_OPCODE_ADD: 2009 brw_ADD(p, dst, src[0], src[1]); 2010 break; 2011 case BRW_OPCODE_MUL: 2012 brw_MUL(p, dst, src[0], src[1]); 2013 break; 2014 2015 case BRW_OPCODE_FRC: 2016 brw_FRC(p, dst, src[0]); 2017 break; 2018 case BRW_OPCODE_RNDD: 2019 brw_RNDD(p, dst, src[0]); 2020 break; 2021 case BRW_OPCODE_RNDZ: 2022 brw_RNDZ(p, dst, src[0]); 2023 break; 2024 2025 case BRW_OPCODE_AND: 2026 brw_AND(p, dst, src[0], src[1]); 2027 break; 2028 case BRW_OPCODE_OR: 2029 brw_OR(p, dst, src[0], src[1]); 2030 break; 2031 case BRW_OPCODE_XOR: 2032 brw_XOR(p, dst, src[0], src[1]); 2033 break; 2034 2035 case BRW_OPCODE_CMP: 2036 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 2037 break; 2038 case BRW_OPCODE_SEL: 2039 brw_SEL(p, dst, src[0], src[1]); 2040 break; 2041 2042 case BRW_OPCODE_IF: 2043 assert(if_stack_depth < 16); 2044 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 2045 if_depth_in_loop[loop_stack_depth]++; 2046 if_stack_depth++; 2047 break; 2048 case BRW_OPCODE_ELSE: 2049 if_stack[if_stack_depth - 1] = 2050 brw_ELSE(p, if_stack[if_stack_depth - 1]); 2051 break; 2052 case BRW_OPCODE_ENDIF: 2053 if_stack_depth--; 2054 brw_ENDIF(p , if_stack[if_stack_depth]); 2055 if_depth_in_loop[loop_stack_depth]--; 2056 break; 2057 2058 case BRW_OPCODE_DO: 2059 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 2060 if_depth_in_loop[loop_stack_depth] = 0; 2061 break; 2062 2063 case BRW_OPCODE_BREAK: 2064 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 2065 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2066 break; 2067 case BRW_OPCODE_CONTINUE: 2068 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 2069 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2070 break; 2071 2072 case BRW_OPCODE_WHILE: { 2073 struct brw_instruction *inst0, *inst1; 2074 GLuint br = 1; 2075 2076 if (intel->gen == 5) 2077 br = 2; 2078 2079 assert(loop_stack_depth > 0); 2080 loop_stack_depth--; 2081 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 2082 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 2083 while (inst0 > loop_stack[loop_stack_depth]) { 2084 inst0--; 2085 if (inst0->header.opcode == BRW_OPCODE_BREAK && 2086 inst0->bits3.if_else.jump_count == 0) { 2087 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 2088 } 2089 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 2090 inst0->bits3.if_else.jump_count == 0) { 2091 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 2092 } 2093 } 2094 } 2095 break; 2096 2097 case FS_OPCODE_RCP: 2098 case FS_OPCODE_RSQ: 2099 case FS_OPCODE_SQRT: 2100 case FS_OPCODE_EXP2: 2101 case FS_OPCODE_LOG2: 2102 case FS_OPCODE_POW: 2103 case FS_OPCODE_SIN: 2104 case FS_OPCODE_COS: 2105 generate_math(inst, dst, src); 2106 break; 2107 case FS_OPCODE_LINTERP: 2108 generate_linterp(inst, dst, src); 2109 break; 2110 case FS_OPCODE_TEX: 2111 case FS_OPCODE_TXB: 2112 case FS_OPCODE_TXL: 2113 generate_tex(inst, dst, src[0]); 2114 break; 2115 case FS_OPCODE_DISCARD: 2116 generate_discard(inst); 2117 break; 2118 case FS_OPCODE_DDX: 2119 generate_ddx(inst, dst, src[0]); 2120 break; 2121 case FS_OPCODE_DDY: 2122 generate_ddy(inst, dst, src[0]); 2123 break; 2124 case FS_OPCODE_FB_WRITE: 2125 generate_fb_write(inst); 2126 break; 2127 default: 2128 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 2129 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 2130 brw_opcodes[inst->opcode].name); 2131 } else { 2132 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 2133 } 2134 this->fail = true; 2135 } 2136 2137 if (annotation_len < p->nr_insn) { 2138 annotation_len *= 2; 2139 if (annotation_len < 16) 2140 annotation_len = 16; 2141 2142 this->annotation_string = talloc_realloc(this->mem_ctx, 2143 annotation_string, 2144 const char *, 2145 annotation_len); 2146 this->annotation_ir = talloc_realloc(this->mem_ctx, 2147 annotation_ir, 2148 ir_instruction *, 2149 annotation_len); 2150 } 2151 2152 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 2153 this->annotation_string[i] = inst->annotation; 2154 this->annotation_ir[i] = inst->ir; 2155 } 2156 last_native_inst = p->nr_insn; 2157 } 2158} 2159 2160GLboolean 2161brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 2162{ 2163 struct brw_compile *p = &c->func; 2164 struct intel_context *intel = &brw->intel; 2165 GLcontext *ctx = &intel->ctx; 2166 struct brw_shader *shader = NULL; 2167 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 2168 2169 if (!prog) 2170 return GL_FALSE; 2171 2172 if (!using_new_fs) 2173 return GL_FALSE; 2174 2175 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 2176 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 2177 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 2178 break; 2179 } 2180 } 2181 if (!shader) 2182 return GL_FALSE; 2183 2184 /* We always use 8-wide mode, at least for now. For one, flow 2185 * control only works in 8-wide. Also, when we're fragment shader 2186 * bound, we're almost always under register pressure as well, so 2187 * 8-wide would save us from the performance cliff of spilling 2188 * regs. 2189 */ 2190 c->dispatch_width = 8; 2191 2192 if (INTEL_DEBUG & DEBUG_WM) { 2193 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 2194 _mesa_print_ir(shader->ir, NULL); 2195 printf("\n"); 2196 } 2197 2198 /* Now the main event: Visit the shader IR and generate our FS IR for it. 2199 */ 2200 fs_visitor v(c, shader); 2201 2202 if (0) { 2203 v.emit_dummy_fs(); 2204 } else { 2205 v.emit_interpolation_setup(); 2206 2207 /* Generate FS IR for main(). (the visitor only descends into 2208 * functions called "main"). 2209 */ 2210 foreach_iter(exec_list_iterator, iter, *shader->ir) { 2211 ir_instruction *ir = (ir_instruction *)iter.get(); 2212 v.base_ir = ir; 2213 ir->accept(&v); 2214 } 2215 2216 v.emit_fb_writes(); 2217 v.assign_curb_setup(); 2218 v.assign_urb_setup(); 2219 v.assign_regs(); 2220 } 2221 2222 v.generate_code(); 2223 2224 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 2225 2226 if (v.fail) 2227 return GL_FALSE; 2228 2229 if (INTEL_DEBUG & DEBUG_WM) { 2230 const char *last_annotation_string = NULL; 2231 ir_instruction *last_annotation_ir = NULL; 2232 2233 printf("Native code for fragment shader %d:\n", prog->Name); 2234 for (unsigned int i = 0; i < p->nr_insn; i++) { 2235 if (last_annotation_ir != v.annotation_ir[i]) { 2236 last_annotation_ir = v.annotation_ir[i]; 2237 if (last_annotation_ir) { 2238 printf(" "); 2239 last_annotation_ir->print(); 2240 printf("\n"); 2241 } 2242 } 2243 if (last_annotation_string != v.annotation_string[i]) { 2244 last_annotation_string = v.annotation_string[i]; 2245 if (last_annotation_string) 2246 printf(" %s\n", last_annotation_string); 2247 } 2248 brw_disasm(stdout, &p->store[i], intel->gen); 2249 } 2250 printf("\n"); 2251 } 2252 2253 c->prog_data.total_grf = v.grf_used; 2254 c->prog_data.total_scratch = 0; 2255 2256 return GL_TRUE; 2257} 2258