brw_fs.cpp revision 57edd7c5c116926325e3a86cef618bfd1b5881c1
1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28extern "C" { 29 30#include <sys/types.h> 31 32#include "main/macros.h" 33#include "main/shaderobj.h" 34#include "program/prog_parameter.h" 35#include "program/prog_print.h" 36#include "program/prog_optimize.h" 37#include "program/hash_table.h" 38#include "brw_context.h" 39#include "brw_eu.h" 40#include "brw_wm.h" 41#include "talloc.h" 42} 43#include "../glsl/glsl_types.h" 44#include "../glsl/ir_optimization.h" 45#include "../glsl/ir_print_visitor.h" 46 47enum register_file { 48 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 49 GRF = BRW_GENERAL_REGISTER_FILE, 50 MRF = BRW_MESSAGE_REGISTER_FILE, 51 IMM = BRW_IMMEDIATE_VALUE, 52 FIXED_HW_REG, /* a struct brw_reg */ 53 UNIFORM, /* prog_data->params[hw_reg] */ 54 BAD_FILE 55}; 56 57enum fs_opcodes { 58 FS_OPCODE_FB_WRITE = 256, 59 FS_OPCODE_RCP, 60 FS_OPCODE_RSQ, 61 FS_OPCODE_SQRT, 62 FS_OPCODE_EXP2, 63 FS_OPCODE_LOG2, 64 FS_OPCODE_POW, 65 FS_OPCODE_SIN, 66 FS_OPCODE_COS, 67 FS_OPCODE_DDX, 68 FS_OPCODE_DDY, 69 FS_OPCODE_LINTERP, 70 FS_OPCODE_TEX, 71 FS_OPCODE_TXB, 72 FS_OPCODE_TXL, 73 FS_OPCODE_DISCARD, 74}; 75 76static int using_new_fs = -1; 77 78struct gl_shader * 79brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) 80{ 81 struct brw_shader *shader; 82 83 shader = talloc_zero(NULL, struct brw_shader); 84 if (shader) { 85 shader->base.Type = type; 86 shader->base.Name = name; 87 _mesa_init_shader(ctx, &shader->base); 88 } 89 90 return &shader->base; 91} 92 93struct gl_shader_program * 94brw_new_shader_program(GLcontext *ctx, GLuint name) 95{ 96 struct brw_shader_program *prog; 97 prog = talloc_zero(NULL, struct brw_shader_program); 98 if (prog) { 99 prog->base.Name = name; 100 _mesa_init_shader_program(ctx, &prog->base); 101 } 102 return &prog->base; 103} 104 105GLboolean 106brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) 107{ 108 if (!_mesa_ir_compile_shader(ctx, shader)) 109 return GL_FALSE; 110 111 return GL_TRUE; 112} 113 114GLboolean 115brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) 116{ 117 if (using_new_fs == -1) 118 using_new_fs = getenv("INTEL_NEW_FS") != NULL; 119 120 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { 121 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; 122 123 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { 124 void *mem_ctx = talloc_new(NULL); 125 bool progress; 126 127 if (shader->ir) 128 talloc_free(shader->ir); 129 shader->ir = new(shader) exec_list; 130 clone_ir_list(mem_ctx, shader->ir, shader->base.ir); 131 132 do_mat_op_to_vec(shader->ir); 133 do_mod_to_fract(shader->ir); 134 do_div_to_mul_rcp(shader->ir); 135 do_sub_to_add_neg(shader->ir); 136 do_explog_to_explog2(shader->ir); 137 138 brw_do_channel_expressions(shader->ir); 139 brw_do_vector_splitting(shader->ir); 140 141 do { 142 progress = false; 143 144 progress = do_lower_jumps(shader->ir, true, true, 145 true, /* main return */ 146 false, /* continue */ 147 false /* loops */ 148 ) || progress; 149 150 progress = do_common_optimization(shader->ir, true, 32) || progress; 151 152 progress = lower_noise(shader->ir) || progress; 153 progress = 154 lower_variable_index_to_cond_assign(shader->ir, 155 GL_TRUE, /* input */ 156 GL_TRUE, /* output */ 157 GL_TRUE, /* temp */ 158 GL_TRUE /* uniform */ 159 ) || progress; 160 } while (progress); 161 162 validate_ir_tree(shader->ir); 163 164 reparent_ir(shader->ir, shader->ir); 165 talloc_free(mem_ctx); 166 } 167 } 168 169 if (!_mesa_ir_link_shader(ctx, prog)) 170 return GL_FALSE; 171 172 return GL_TRUE; 173} 174 175static int 176type_size(const struct glsl_type *type) 177{ 178 unsigned int size, i; 179 180 switch (type->base_type) { 181 case GLSL_TYPE_UINT: 182 case GLSL_TYPE_INT: 183 case GLSL_TYPE_FLOAT: 184 case GLSL_TYPE_BOOL: 185 return type->components(); 186 case GLSL_TYPE_ARRAY: 187 /* FINISHME: uniform/varying arrays. */ 188 return type_size(type->fields.array) * type->length; 189 case GLSL_TYPE_STRUCT: 190 size = 0; 191 for (i = 0; i < type->length; i++) { 192 size += type_size(type->fields.structure[i].type); 193 } 194 return size; 195 case GLSL_TYPE_SAMPLER: 196 /* Samplers take up no register space, since they're baked in at 197 * link time. 198 */ 199 return 0; 200 default: 201 assert(!"not reached"); 202 return 0; 203 } 204} 205 206class fs_reg { 207public: 208 /* Callers of this talloc-based new need not call delete. It's 209 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 210 static void* operator new(size_t size, void *ctx) 211 { 212 void *node; 213 214 node = talloc_size(ctx, size); 215 assert(node != NULL); 216 217 return node; 218 } 219 220 void init() 221 { 222 this->reg = 0; 223 this->reg_offset = 0; 224 this->negate = 0; 225 this->abs = 0; 226 this->hw_reg = -1; 227 } 228 229 /** Generic unset register constructor. */ 230 fs_reg() 231 { 232 init(); 233 this->file = BAD_FILE; 234 } 235 236 /** Immediate value constructor. */ 237 fs_reg(float f) 238 { 239 init(); 240 this->file = IMM; 241 this->type = BRW_REGISTER_TYPE_F; 242 this->imm.f = f; 243 } 244 245 /** Immediate value constructor. */ 246 fs_reg(int32_t i) 247 { 248 init(); 249 this->file = IMM; 250 this->type = BRW_REGISTER_TYPE_D; 251 this->imm.i = i; 252 } 253 254 /** Immediate value constructor. */ 255 fs_reg(uint32_t u) 256 { 257 init(); 258 this->file = IMM; 259 this->type = BRW_REGISTER_TYPE_UD; 260 this->imm.u = u; 261 } 262 263 /** Fixed brw_reg Immediate value constructor. */ 264 fs_reg(struct brw_reg fixed_hw_reg) 265 { 266 init(); 267 this->file = FIXED_HW_REG; 268 this->fixed_hw_reg = fixed_hw_reg; 269 this->type = fixed_hw_reg.type; 270 } 271 272 fs_reg(enum register_file file, int hw_reg); 273 fs_reg(class fs_visitor *v, const struct glsl_type *type); 274 275 /** Register file: ARF, GRF, MRF, IMM. */ 276 enum register_file file; 277 /** Abstract register number. 0 = fixed hw reg */ 278 int reg; 279 /** Offset within the abstract register. */ 280 int reg_offset; 281 /** HW register number. Generally unset until register allocation. */ 282 int hw_reg; 283 /** Register type. BRW_REGISTER_TYPE_* */ 284 int type; 285 bool negate; 286 bool abs; 287 struct brw_reg fixed_hw_reg; 288 289 /** Value for file == BRW_IMMMEDIATE_FILE */ 290 union { 291 int32_t i; 292 uint32_t u; 293 float f; 294 } imm; 295}; 296 297static const fs_reg reg_undef; 298static const fs_reg reg_null(ARF, BRW_ARF_NULL); 299 300class fs_inst : public exec_node { 301public: 302 /* Callers of this talloc-based new need not call delete. It's 303 * easier to just talloc_free 'ctx' (or any of its ancestors). */ 304 static void* operator new(size_t size, void *ctx) 305 { 306 void *node; 307 308 node = talloc_zero_size(ctx, size); 309 assert(node != NULL); 310 311 return node; 312 } 313 314 void init() 315 { 316 this->opcode = BRW_OPCODE_NOP; 317 this->saturate = false; 318 this->conditional_mod = BRW_CONDITIONAL_NONE; 319 this->predicated = false; 320 this->sampler = 0; 321 this->shadow_compare = false; 322 } 323 324 fs_inst() 325 { 326 init(); 327 } 328 329 fs_inst(int opcode) 330 { 331 init(); 332 this->opcode = opcode; 333 } 334 335 fs_inst(int opcode, fs_reg dst, fs_reg src0) 336 { 337 init(); 338 this->opcode = opcode; 339 this->dst = dst; 340 this->src[0] = src0; 341 } 342 343 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) 344 { 345 init(); 346 this->opcode = opcode; 347 this->dst = dst; 348 this->src[0] = src0; 349 this->src[1] = src1; 350 } 351 352 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) 353 { 354 init(); 355 this->opcode = opcode; 356 this->dst = dst; 357 this->src[0] = src0; 358 this->src[1] = src1; 359 this->src[2] = src2; 360 } 361 362 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 363 fs_reg dst; 364 fs_reg src[3]; 365 bool saturate; 366 bool predicated; 367 int conditional_mod; /**< BRW_CONDITIONAL_* */ 368 369 int mlen; /** SEND message length */ 370 int sampler; 371 bool shadow_compare; 372 373 /** @{ 374 * Annotation for the generated IR. One of the two can be set. 375 */ 376 ir_instruction *ir; 377 const char *annotation; 378 /** @} */ 379}; 380 381class fs_visitor : public ir_visitor 382{ 383public: 384 385 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) 386 { 387 this->c = c; 388 this->p = &c->func; 389 this->brw = p->brw; 390 this->intel = &brw->intel; 391 this->ctx = &intel->ctx; 392 this->mem_ctx = talloc_new(NULL); 393 this->shader = shader; 394 this->fail = false; 395 this->next_abstract_grf = 1; 396 this->variable_ht = hash_table_ctor(0, 397 hash_table_pointer_hash, 398 hash_table_pointer_compare); 399 400 this->frag_color = NULL; 401 this->frag_data = NULL; 402 this->frag_depth = NULL; 403 this->first_non_payload_grf = 0; 404 405 this->current_annotation = NULL; 406 this->annotation_string = NULL; 407 this->annotation_ir = NULL; 408 this->base_ir = NULL; 409 } 410 ~fs_visitor() 411 { 412 talloc_free(this->mem_ctx); 413 hash_table_dtor(this->variable_ht); 414 } 415 416 fs_reg *variable_storage(ir_variable *var); 417 418 void visit(ir_variable *ir); 419 void visit(ir_assignment *ir); 420 void visit(ir_dereference_variable *ir); 421 void visit(ir_dereference_record *ir); 422 void visit(ir_dereference_array *ir); 423 void visit(ir_expression *ir); 424 void visit(ir_texture *ir); 425 void visit(ir_if *ir); 426 void visit(ir_constant *ir); 427 void visit(ir_swizzle *ir); 428 void visit(ir_return *ir); 429 void visit(ir_loop *ir); 430 void visit(ir_loop_jump *ir); 431 void visit(ir_discard *ir); 432 void visit(ir_call *ir); 433 void visit(ir_function *ir); 434 void visit(ir_function_signature *ir); 435 436 fs_inst *emit(fs_inst inst); 437 void assign_curb_setup(); 438 void assign_urb_setup(); 439 void assign_regs(); 440 void generate_code(); 441 void generate_fb_write(fs_inst *inst); 442 void generate_linterp(fs_inst *inst, struct brw_reg dst, 443 struct brw_reg *src); 444 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 445 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); 446 void generate_discard(fs_inst *inst); 447 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 448 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 449 450 void emit_dummy_fs(); 451 void emit_interpolation(); 452 void emit_pinterp(int location); 453 void emit_fb_writes(); 454 455 struct brw_reg interp_reg(int location, int channel); 456 457 struct brw_context *brw; 458 struct intel_context *intel; 459 GLcontext *ctx; 460 struct brw_wm_compile *c; 461 struct brw_compile *p; 462 struct brw_shader *shader; 463 void *mem_ctx; 464 exec_list instructions; 465 int next_abstract_grf; 466 struct hash_table *variable_ht; 467 ir_variable *frag_color, *frag_data, *frag_depth; 468 int first_non_payload_grf; 469 470 /** @{ debug annotation info */ 471 const char *current_annotation; 472 ir_instruction *base_ir; 473 const char **annotation_string; 474 ir_instruction **annotation_ir; 475 /** @} */ 476 477 bool fail; 478 479 /* Result of last visit() method. */ 480 fs_reg result; 481 482 fs_reg pixel_x; 483 fs_reg pixel_y; 484 fs_reg pixel_w; 485 fs_reg delta_x; 486 fs_reg delta_y; 487 fs_reg interp_attrs[64]; 488 489 int grf_used; 490 491}; 492 493/** Fixed HW reg constructor. */ 494fs_reg::fs_reg(enum register_file file, int hw_reg) 495{ 496 init(); 497 this->file = file; 498 this->hw_reg = hw_reg; 499 this->type = BRW_REGISTER_TYPE_F; 500} 501 502/** Automatic reg constructor. */ 503fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) 504{ 505 init(); 506 507 this->file = GRF; 508 this->reg = v->next_abstract_grf; 509 this->reg_offset = 0; 510 v->next_abstract_grf += type_size(type); 511 512 switch (type->base_type) { 513 case GLSL_TYPE_FLOAT: 514 this->type = BRW_REGISTER_TYPE_F; 515 break; 516 case GLSL_TYPE_INT: 517 case GLSL_TYPE_BOOL: 518 this->type = BRW_REGISTER_TYPE_D; 519 break; 520 case GLSL_TYPE_UINT: 521 this->type = BRW_REGISTER_TYPE_UD; 522 break; 523 default: 524 assert(!"not reached"); 525 this->type = BRW_REGISTER_TYPE_F; 526 break; 527 } 528} 529 530fs_reg * 531fs_visitor::variable_storage(ir_variable *var) 532{ 533 return (fs_reg *)hash_table_find(this->variable_ht, var); 534} 535 536void 537fs_visitor::visit(ir_variable *ir) 538{ 539 fs_reg *reg = NULL; 540 541 if (strcmp(ir->name, "gl_FragColor") == 0) { 542 this->frag_color = ir; 543 } else if (strcmp(ir->name, "gl_FragData") == 0) { 544 this->frag_data = ir; 545 } else if (strcmp(ir->name, "gl_FragDepth") == 0) { 546 this->frag_depth = ir; 547 assert(!"FINISHME: this hangs currently."); 548 } 549 550 if (ir->mode == ir_var_in) { 551 reg = &this->interp_attrs[ir->location]; 552 } 553 554 if (ir->mode == ir_var_uniform) { 555 const float *vec_values; 556 int param_index = c->prog_data.nr_params; 557 558 /* FINISHME: This is wildly incomplete. */ 559 assert(ir->type->is_scalar() || ir->type->is_vector() || 560 ir->type->is_sampler()); 561 562 const struct gl_program *fp = &this->brw->fragment_program->Base; 563 /* Our support for uniforms is piggy-backed on the struct 564 * gl_fragment_program, because that's where the values actually 565 * get stored, rather than in some global gl_shader_program uniform 566 * store. 567 */ 568 vec_values = fp->Parameters->ParameterValues[ir->location]; 569 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 570 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; 571 } 572 573 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); 574 } 575 576 if (!reg) 577 reg = new(this->mem_ctx) fs_reg(this, ir->type); 578 579 hash_table_insert(this->variable_ht, reg, ir); 580} 581 582void 583fs_visitor::visit(ir_dereference_variable *ir) 584{ 585 fs_reg *reg = variable_storage(ir->var); 586 this->result = *reg; 587} 588 589void 590fs_visitor::visit(ir_dereference_record *ir) 591{ 592 assert(!"FINISHME"); 593} 594 595void 596fs_visitor::visit(ir_dereference_array *ir) 597{ 598 ir_constant *index; 599 int element_size; 600 601 ir->array->accept(this); 602 index = ir->array_index->as_constant(); 603 604 if (ir->type->is_matrix()) { 605 element_size = ir->type->vector_elements; 606 } else { 607 element_size = type_size(ir->type); 608 } 609 610 if (index) { 611 assert(this->result.file == UNIFORM || 612 (this->result.file == GRF && 613 this->result.reg != 0)); 614 this->result.reg_offset += index->value.i[0] * element_size; 615 } else { 616 assert(!"FINISHME: non-constant matrix column"); 617 } 618} 619 620void 621fs_visitor::visit(ir_expression *ir) 622{ 623 unsigned int operand; 624 fs_reg op[2], temp; 625 fs_reg result; 626 fs_inst *inst; 627 628 for (operand = 0; operand < ir->get_num_operands(); operand++) { 629 ir->operands[operand]->accept(this); 630 if (this->result.file == BAD_FILE) { 631 ir_print_visitor v; 632 printf("Failed to get tree for expression operand:\n"); 633 ir->operands[operand]->accept(&v); 634 this->fail = true; 635 } 636 op[operand] = this->result; 637 638 /* Matrix expression operands should have been broken down to vector 639 * operations already. 640 */ 641 assert(!ir->operands[operand]->type->is_matrix()); 642 /* And then those vector operands should have been broken down to scalar. 643 */ 644 assert(!ir->operands[operand]->type->is_vector()); 645 } 646 647 /* Storage for our result. If our result goes into an assignment, it will 648 * just get copy-propagated out, so no worries. 649 */ 650 this->result = fs_reg(this, ir->type); 651 652 switch (ir->operation) { 653 case ir_unop_logic_not: 654 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1))); 655 break; 656 case ir_unop_neg: 657 op[0].negate = !op[0].negate; 658 this->result = op[0]; 659 break; 660 case ir_unop_abs: 661 op[0].abs = true; 662 this->result = op[0]; 663 break; 664 case ir_unop_sign: 665 temp = fs_reg(this, ir->type); 666 667 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); 668 669 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 670 inst->conditional_mod = BRW_CONDITIONAL_G; 671 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); 672 inst->predicated = true; 673 674 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f))); 675 inst->conditional_mod = BRW_CONDITIONAL_L; 676 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); 677 inst->predicated = true; 678 679 break; 680 case ir_unop_rcp: 681 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0])); 682 break; 683 684 case ir_unop_exp2: 685 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0])); 686 break; 687 case ir_unop_log2: 688 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0])); 689 break; 690 case ir_unop_exp: 691 case ir_unop_log: 692 assert(!"not reached: should be handled by ir_explog_to_explog2"); 693 break; 694 case ir_unop_sin: 695 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0])); 696 break; 697 case ir_unop_cos: 698 emit(fs_inst(FS_OPCODE_COS, this->result, op[0])); 699 break; 700 701 case ir_unop_dFdx: 702 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); 703 break; 704 case ir_unop_dFdy: 705 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); 706 break; 707 708 case ir_binop_add: 709 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); 710 break; 711 case ir_binop_sub: 712 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 713 break; 714 715 case ir_binop_mul: 716 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); 717 break; 718 case ir_binop_div: 719 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 720 break; 721 case ir_binop_mod: 722 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 723 break; 724 725 case ir_binop_less: 726 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 727 inst->conditional_mod = BRW_CONDITIONAL_L; 728 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 729 break; 730 case ir_binop_greater: 731 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 732 inst->conditional_mod = BRW_CONDITIONAL_G; 733 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 734 break; 735 case ir_binop_lequal: 736 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 737 inst->conditional_mod = BRW_CONDITIONAL_LE; 738 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 739 break; 740 case ir_binop_gequal: 741 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 742 inst->conditional_mod = BRW_CONDITIONAL_GE; 743 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 744 break; 745 case ir_binop_equal: 746 case ir_binop_all_equal: /* same as nequal for scalars */ 747 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 748 inst->conditional_mod = BRW_CONDITIONAL_Z; 749 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 750 break; 751 case ir_binop_nequal: 752 case ir_binop_any_nequal: /* same as nequal for scalars */ 753 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 754 inst->conditional_mod = BRW_CONDITIONAL_NZ; 755 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); 756 break; 757 758 case ir_binop_logic_xor: 759 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); 760 break; 761 762 case ir_binop_logic_or: 763 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); 764 break; 765 766 case ir_binop_logic_and: 767 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); 768 break; 769 770 case ir_binop_dot: 771 case ir_binop_cross: 772 case ir_unop_any: 773 assert(!"not reached: should be handled by brw_fs_channel_expressions"); 774 break; 775 776 case ir_unop_noise: 777 assert(!"not reached: should be handled by lower_noise"); 778 break; 779 780 case ir_unop_sqrt: 781 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0])); 782 break; 783 784 case ir_unop_rsq: 785 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0])); 786 break; 787 788 case ir_unop_i2f: 789 case ir_unop_b2f: 790 case ir_unop_b2i: 791 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 792 break; 793 case ir_unop_f2i: 794 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); 795 break; 796 case ir_unop_f2b: 797 case ir_unop_i2b: 798 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); 799 inst->conditional_mod = BRW_CONDITIONAL_NZ; 800 801 case ir_unop_trunc: 802 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 803 break; 804 case ir_unop_ceil: 805 op[0].negate = ~op[0].negate; 806 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 807 this->result.negate = true; 808 break; 809 case ir_unop_floor: 810 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); 811 break; 812 case ir_unop_fract: 813 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); 814 break; 815 816 case ir_binop_min: 817 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 818 inst->conditional_mod = BRW_CONDITIONAL_L; 819 820 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 821 inst->predicated = true; 822 break; 823 case ir_binop_max: 824 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); 825 inst->conditional_mod = BRW_CONDITIONAL_G; 826 827 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); 828 inst->predicated = true; 829 break; 830 831 case ir_binop_pow: 832 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1])); 833 break; 834 835 case ir_unop_bit_not: 836 case ir_unop_u2f: 837 case ir_binop_lshift: 838 case ir_binop_rshift: 839 case ir_binop_bit_and: 840 case ir_binop_bit_xor: 841 case ir_binop_bit_or: 842 assert(!"GLSL 1.30 features unsupported"); 843 break; 844 } 845} 846 847void 848fs_visitor::visit(ir_assignment *ir) 849{ 850 struct fs_reg l, r; 851 int i; 852 int write_mask; 853 fs_inst *inst; 854 855 /* FINISHME: arrays on the lhs */ 856 ir->lhs->accept(this); 857 l = this->result; 858 859 ir->rhs->accept(this); 860 r = this->result; 861 862 /* FINISHME: This should really set to the correct maximal writemask for each 863 * FINISHME: component written (in the loops below). This case can only 864 * FINISHME: occur for matrices, arrays, and structures. 865 */ 866 if (ir->write_mask == 0) { 867 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 868 write_mask = WRITEMASK_XYZW; 869 } else { 870 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar()); 871 write_mask = ir->write_mask; 872 } 873 874 assert(l.file != BAD_FILE); 875 assert(r.file != BAD_FILE); 876 877 if (ir->condition) { 878 /* Get the condition bool into the predicate. */ 879 ir->condition->accept(this); 880 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0))); 881 inst->conditional_mod = BRW_CONDITIONAL_NZ; 882 } 883 884 for (i = 0; i < type_size(ir->lhs->type); i++) { 885 if (i >= 4 || (write_mask & (1 << i))) { 886 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); 887 if (ir->condition) 888 inst->predicated = true; 889 r.reg_offset++; 890 } 891 l.reg_offset++; 892 } 893} 894 895void 896fs_visitor::visit(ir_texture *ir) 897{ 898 int base_mrf = 2; 899 fs_inst *inst = NULL; 900 unsigned int mlen = 0; 901 902 ir->coordinate->accept(this); 903 fs_reg coordinate = this->result; 904 905 if (ir->projector) { 906 fs_reg inv_proj = fs_reg(this, glsl_type::float_type); 907 908 ir->projector->accept(this); 909 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result)); 910 911 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type); 912 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) { 913 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj)); 914 coordinate.reg_offset++; 915 proj_coordinate.reg_offset++; 916 } 917 proj_coordinate.reg_offset = 0; 918 919 coordinate = proj_coordinate; 920 } 921 922 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) { 923 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate)); 924 coordinate.reg_offset++; 925 } 926 927 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ 928 if (intel->gen < 5) 929 mlen = 3; 930 931 if (ir->shadow_comparitor) { 932 /* For shadow comparisons, we have to supply u,v,r. */ 933 mlen = 3; 934 935 ir->shadow_comparitor->accept(this); 936 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 937 mlen++; 938 } 939 940 /* Do we ever want to handle writemasking on texture samples? Is it 941 * performance relevant? 942 */ 943 fs_reg dst = fs_reg(this, glsl_type::vec4_type); 944 945 switch (ir->op) { 946 case ir_tex: 947 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf))); 948 break; 949 case ir_txb: 950 ir->lod_info.bias->accept(this); 951 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 952 mlen++; 953 954 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf))); 955 break; 956 case ir_txl: 957 ir->lod_info.lod->accept(this); 958 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); 959 mlen++; 960 961 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf))); 962 break; 963 case ir_txd: 964 case ir_txf: 965 assert(!"GLSL 1.30 features unsupported"); 966 break; 967 } 968 969 this->result = dst; 970 971 if (ir->shadow_comparitor) 972 inst->shadow_compare = true; 973 inst->mlen = mlen; 974} 975 976void 977fs_visitor::visit(ir_swizzle *ir) 978{ 979 ir->val->accept(this); 980 fs_reg val = this->result; 981 982 fs_reg result = fs_reg(this, ir->type); 983 this->result = result; 984 985 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 986 fs_reg channel = val; 987 int swiz = 0; 988 989 switch (i) { 990 case 0: 991 swiz = ir->mask.x; 992 break; 993 case 1: 994 swiz = ir->mask.y; 995 break; 996 case 2: 997 swiz = ir->mask.z; 998 break; 999 case 3: 1000 swiz = ir->mask.w; 1001 break; 1002 } 1003 1004 channel.reg_offset += swiz; 1005 emit(fs_inst(BRW_OPCODE_MOV, result, channel)); 1006 result.reg_offset++; 1007 } 1008} 1009 1010void 1011fs_visitor::visit(ir_discard *ir) 1012{ 1013 assert(ir->condition == NULL); /* FINISHME */ 1014 1015 emit(fs_inst(FS_OPCODE_DISCARD)); 1016} 1017 1018void 1019fs_visitor::visit(ir_constant *ir) 1020{ 1021 fs_reg reg(this, ir->type); 1022 this->result = reg; 1023 1024 for (unsigned int i = 0; i < ir->type->vector_elements; i++) { 1025 switch (ir->type->base_type) { 1026 case GLSL_TYPE_FLOAT: 1027 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); 1028 break; 1029 case GLSL_TYPE_UINT: 1030 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); 1031 break; 1032 case GLSL_TYPE_INT: 1033 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); 1034 break; 1035 case GLSL_TYPE_BOOL: 1036 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); 1037 break; 1038 default: 1039 assert(!"Non-float/uint/int/bool constant"); 1040 } 1041 reg.reg_offset++; 1042 } 1043} 1044 1045void 1046fs_visitor::visit(ir_if *ir) 1047{ 1048 fs_inst *inst; 1049 1050 /* Don't point the annotation at the if statement, because then it plus 1051 * the then and else blocks get printed. 1052 */ 1053 this->base_ir = ir->condition; 1054 1055 /* Generate the condition into the condition code. */ 1056 ir->condition->accept(this); 1057 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result)); 1058 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1059 1060 inst = emit(fs_inst(BRW_OPCODE_IF)); 1061 inst->predicated = true; 1062 1063 foreach_iter(exec_list_iterator, iter, ir->then_instructions) { 1064 ir_instruction *ir = (ir_instruction *)iter.get(); 1065 this->base_ir = ir; 1066 1067 ir->accept(this); 1068 } 1069 1070 if (!ir->else_instructions.is_empty()) { 1071 emit(fs_inst(BRW_OPCODE_ELSE)); 1072 1073 foreach_iter(exec_list_iterator, iter, ir->else_instructions) { 1074 ir_instruction *ir = (ir_instruction *)iter.get(); 1075 this->base_ir = ir; 1076 1077 ir->accept(this); 1078 } 1079 } 1080 1081 emit(fs_inst(BRW_OPCODE_ENDIF)); 1082} 1083 1084void 1085fs_visitor::visit(ir_loop *ir) 1086{ 1087 assert(!ir->from); 1088 assert(!ir->to); 1089 assert(!ir->increment); 1090 assert(!ir->counter); 1091 1092 emit(fs_inst(BRW_OPCODE_DO)); 1093 1094 /* Start a safety counter. If the user messed up their loop 1095 * counting, we don't want to hang the GPU. 1096 */ 1097 fs_reg max_iter = fs_reg(this, glsl_type::int_type); 1098 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000))); 1099 1100 foreach_iter(exec_list_iterator, iter, ir->body_instructions) { 1101 ir_instruction *ir = (ir_instruction *)iter.get(); 1102 fs_inst *inst; 1103 1104 this->base_ir = ir; 1105 ir->accept(this); 1106 1107 /* Check the maximum loop iters counter. */ 1108 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1))); 1109 inst->conditional_mod = BRW_CONDITIONAL_Z; 1110 1111 inst = emit(fs_inst(BRW_OPCODE_BREAK)); 1112 inst->predicated = true; 1113 } 1114 1115 emit(fs_inst(BRW_OPCODE_WHILE)); 1116} 1117 1118void 1119fs_visitor::visit(ir_loop_jump *ir) 1120{ 1121 switch (ir->mode) { 1122 case ir_loop_jump::jump_break: 1123 emit(fs_inst(BRW_OPCODE_BREAK)); 1124 break; 1125 case ir_loop_jump::jump_continue: 1126 emit(fs_inst(BRW_OPCODE_CONTINUE)); 1127 break; 1128 } 1129} 1130 1131void 1132fs_visitor::visit(ir_call *ir) 1133{ 1134 assert(!"FINISHME"); 1135} 1136 1137void 1138fs_visitor::visit(ir_return *ir) 1139{ 1140 assert(!"FINISHME"); 1141} 1142 1143void 1144fs_visitor::visit(ir_function *ir) 1145{ 1146 /* Ignore function bodies other than main() -- we shouldn't see calls to 1147 * them since they should all be inlined before we get to ir_to_mesa. 1148 */ 1149 if (strcmp(ir->name, "main") == 0) { 1150 const ir_function_signature *sig; 1151 exec_list empty; 1152 1153 sig = ir->matching_signature(&empty); 1154 1155 assert(sig); 1156 1157 foreach_iter(exec_list_iterator, iter, sig->body) { 1158 ir_instruction *ir = (ir_instruction *)iter.get(); 1159 this->base_ir = ir; 1160 1161 ir->accept(this); 1162 } 1163 } 1164} 1165 1166void 1167fs_visitor::visit(ir_function_signature *ir) 1168{ 1169 assert(!"not reached"); 1170 (void)ir; 1171} 1172 1173fs_inst * 1174fs_visitor::emit(fs_inst inst) 1175{ 1176 fs_inst *list_inst = new(mem_ctx) fs_inst; 1177 *list_inst = inst; 1178 1179 list_inst->annotation = this->current_annotation; 1180 list_inst->ir = this->base_ir; 1181 1182 this->instructions.push_tail(list_inst); 1183 1184 return list_inst; 1185} 1186 1187/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ 1188void 1189fs_visitor::emit_dummy_fs() 1190{ 1191 /* Everyone's favorite color. */ 1192 emit(fs_inst(BRW_OPCODE_MOV, 1193 fs_reg(MRF, 2), 1194 fs_reg(1.0f))); 1195 emit(fs_inst(BRW_OPCODE_MOV, 1196 fs_reg(MRF, 3), 1197 fs_reg(0.0f))); 1198 emit(fs_inst(BRW_OPCODE_MOV, 1199 fs_reg(MRF, 4), 1200 fs_reg(1.0f))); 1201 emit(fs_inst(BRW_OPCODE_MOV, 1202 fs_reg(MRF, 5), 1203 fs_reg(0.0f))); 1204 1205 fs_inst *write; 1206 write = emit(fs_inst(FS_OPCODE_FB_WRITE, 1207 fs_reg(0), 1208 fs_reg(0))); 1209} 1210 1211/* The register location here is relative to the start of the URB 1212 * data. It will get adjusted to be a real location before 1213 * generate_code() time. 1214 */ 1215struct brw_reg 1216fs_visitor::interp_reg(int location, int channel) 1217{ 1218 int regnr = location * 2 + channel / 2; 1219 int stride = (channel & 1) * 4; 1220 1221 return brw_vec1_grf(regnr, stride); 1222} 1223 1224/** Emits the interpolation for the varying inputs. */ 1225void 1226fs_visitor::emit_interpolation() 1227{ 1228 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); 1229 /* For now, the source regs for the setup URB data will be unset, 1230 * since we don't know until codegen how many push constants we'll 1231 * use, and therefore what the setup URB offset is. 1232 */ 1233 fs_reg src_reg = reg_undef; 1234 1235 this->current_annotation = "compute pixel centers"; 1236 this->pixel_x = fs_reg(this, glsl_type::uint_type); 1237 this->pixel_y = fs_reg(this, glsl_type::uint_type); 1238 this->pixel_x.type = BRW_REGISTER_TYPE_UW; 1239 this->pixel_y.type = BRW_REGISTER_TYPE_UW; 1240 emit(fs_inst(BRW_OPCODE_ADD, 1241 this->pixel_x, 1242 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), 1243 fs_reg(brw_imm_v(0x10101010)))); 1244 emit(fs_inst(BRW_OPCODE_ADD, 1245 this->pixel_y, 1246 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), 1247 fs_reg(brw_imm_v(0x11001100)))); 1248 1249 this->current_annotation = "compute pixel deltas from v0"; 1250 this->delta_x = fs_reg(this, glsl_type::float_type); 1251 this->delta_y = fs_reg(this, glsl_type::float_type); 1252 emit(fs_inst(BRW_OPCODE_ADD, 1253 this->delta_x, 1254 this->pixel_x, 1255 fs_reg(negate(brw_vec1_grf(1, 0))))); 1256 emit(fs_inst(BRW_OPCODE_ADD, 1257 this->delta_y, 1258 this->pixel_y, 1259 fs_reg(brw_vec1_grf(1, 1)))); 1260 1261 this->current_annotation = "compute pos.w and 1/pos.w"; 1262 /* Compute wpos. Unlike many other varying inputs, we usually need it 1263 * to produce 1/w, and the varying variable wouldn't show up. 1264 */ 1265 fs_reg wpos = fs_reg(this, glsl_type::vec4_type); 1266 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos; 1267 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */ 1268 wpos.reg_offset++; 1269 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */ 1270 wpos.reg_offset++; 1271 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1272 interp_reg(FRAG_ATTRIB_WPOS, 2))); 1273 wpos.reg_offset++; 1274 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, 1275 interp_reg(FRAG_ATTRIB_WPOS, 3))); 1276 /* Compute the pixel W value from wpos.w. */ 1277 this->pixel_w = fs_reg(this, glsl_type::float_type); 1278 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos)); 1279 1280 /* FINISHME: gl_FrontFacing */ 1281 1282 foreach_iter(exec_list_iterator, iter, *this->shader->ir) { 1283 ir_instruction *ir = (ir_instruction *)iter.get(); 1284 ir_variable *var = ir->as_variable(); 1285 1286 if (!var) 1287 continue; 1288 1289 if (var->mode != ir_var_in) 1290 continue; 1291 1292 /* If it's already set up (WPOS), skip. */ 1293 if (var->location == 0) 1294 continue; 1295 1296 this->current_annotation = talloc_asprintf(this->mem_ctx, 1297 "interpolate %s " 1298 "(FRAG_ATTRIB[%d])", 1299 var->name, 1300 var->location); 1301 emit_pinterp(var->location); 1302 } 1303 this->current_annotation = NULL; 1304} 1305 1306void 1307fs_visitor::emit_pinterp(int location) 1308{ 1309 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type); 1310 this->interp_attrs[location] = interp_attr; 1311 1312 for (unsigned int i = 0; i < 4; i++) { 1313 struct brw_reg interp = interp_reg(location, i); 1314 emit(fs_inst(FS_OPCODE_LINTERP, 1315 interp_attr, 1316 this->delta_x, 1317 this->delta_y, 1318 fs_reg(interp))); 1319 interp_attr.reg_offset++; 1320 } 1321 interp_attr.reg_offset -= 4; 1322 1323 for (unsigned int i = 0; i < 4; i++) { 1324 emit(fs_inst(BRW_OPCODE_MUL, 1325 interp_attr, 1326 interp_attr, 1327 this->pixel_w)); 1328 interp_attr.reg_offset++; 1329 } 1330} 1331 1332void 1333fs_visitor::emit_fb_writes() 1334{ 1335 this->current_annotation = "FB write"; 1336 1337 assert(this->frag_color || !"FINISHME: MRT"); 1338 fs_reg color = *(variable_storage(this->frag_color)); 1339 1340 for (int i = 0; i < 4; i++) { 1341 emit(fs_inst(BRW_OPCODE_MOV, 1342 fs_reg(MRF, 2 + i), 1343 color)); 1344 color.reg_offset++; 1345 } 1346 1347 emit(fs_inst(FS_OPCODE_FB_WRITE, 1348 fs_reg(0), 1349 fs_reg(0))); 1350 1351 this->current_annotation = NULL; 1352} 1353 1354void 1355fs_visitor::generate_fb_write(fs_inst *inst) 1356{ 1357 GLboolean eot = 1; /* FINISHME: MRT */ 1358 /* FINISHME: AADS */ 1359 1360 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied 1361 * move, here's g1. 1362 */ 1363 brw_push_insn_state(p); 1364 brw_set_mask_control(p, BRW_MASK_DISABLE); 1365 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1366 brw_MOV(p, 1367 brw_message_reg(1), 1368 brw_vec8_grf(1, 0)); 1369 brw_pop_insn_state(p); 1370 1371 int nr = 2 + 4; 1372 1373 brw_fb_WRITE(p, 1374 8, /* dispatch_width */ 1375 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 1376 0, /* base MRF */ 1377 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1378 0, /* FINISHME: MRT target */ 1379 nr, 1380 0, 1381 eot); 1382} 1383 1384void 1385fs_visitor::generate_linterp(fs_inst *inst, 1386 struct brw_reg dst, struct brw_reg *src) 1387{ 1388 struct brw_reg delta_x = src[0]; 1389 struct brw_reg delta_y = src[1]; 1390 struct brw_reg interp = src[2]; 1391 1392 if (brw->has_pln && 1393 delta_y.nr == delta_x.nr + 1 && 1394 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { 1395 brw_PLN(p, dst, interp, delta_x); 1396 } else { 1397 brw_LINE(p, brw_null_reg(), interp, delta_x); 1398 brw_MAC(p, dst, suboffset(interp, 1), delta_y); 1399 } 1400} 1401 1402void 1403fs_visitor::generate_math(fs_inst *inst, 1404 struct brw_reg dst, struct brw_reg *src) 1405{ 1406 int op; 1407 1408 switch (inst->opcode) { 1409 case FS_OPCODE_RCP: 1410 op = BRW_MATH_FUNCTION_INV; 1411 break; 1412 case FS_OPCODE_RSQ: 1413 op = BRW_MATH_FUNCTION_RSQ; 1414 break; 1415 case FS_OPCODE_SQRT: 1416 op = BRW_MATH_FUNCTION_SQRT; 1417 break; 1418 case FS_OPCODE_EXP2: 1419 op = BRW_MATH_FUNCTION_EXP; 1420 break; 1421 case FS_OPCODE_LOG2: 1422 op = BRW_MATH_FUNCTION_LOG; 1423 break; 1424 case FS_OPCODE_POW: 1425 op = BRW_MATH_FUNCTION_POW; 1426 break; 1427 case FS_OPCODE_SIN: 1428 op = BRW_MATH_FUNCTION_SIN; 1429 break; 1430 case FS_OPCODE_COS: 1431 op = BRW_MATH_FUNCTION_COS; 1432 break; 1433 default: 1434 assert(!"not reached: unknown math function"); 1435 op = 0; 1436 break; 1437 } 1438 1439 if (inst->opcode == FS_OPCODE_POW) { 1440 brw_MOV(p, brw_message_reg(3), src[1]); 1441 } 1442 1443 brw_math(p, dst, 1444 op, 1445 inst->saturate ? BRW_MATH_SATURATE_SATURATE : 1446 BRW_MATH_SATURATE_NONE, 1447 2, src[0], 1448 BRW_MATH_DATA_VECTOR, 1449 BRW_MATH_PRECISION_FULL); 1450} 1451 1452void 1453fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1454{ 1455 int msg_type = -1; 1456 int rlen = 4; 1457 1458 if (intel->gen == 5) { 1459 switch (inst->opcode) { 1460 case FS_OPCODE_TEX: 1461 if (inst->shadow_compare) { 1462 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; 1463 } else { 1464 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5; 1465 } 1466 break; 1467 case FS_OPCODE_TXB: 1468 if (inst->shadow_compare) { 1469 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5; 1470 } else { 1471 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; 1472 } 1473 break; 1474 } 1475 } else { 1476 switch (inst->opcode) { 1477 case FS_OPCODE_TEX: 1478 /* Note that G45 and older determines shadow compare and dispatch width 1479 * from message length for most messages. 1480 */ 1481 if (inst->shadow_compare) { 1482 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; 1483 } else { 1484 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; 1485 } 1486 case FS_OPCODE_TXB: 1487 if (inst->shadow_compare) { 1488 assert(!"FINISHME: shadow compare with bias."); 1489 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1490 } else { 1491 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; 1492 rlen = 8; 1493 } 1494 break; 1495 } 1496 } 1497 assert(msg_type != -1); 1498 1499 /* g0 header. */ 1500 src.nr--; 1501 1502 brw_SAMPLE(p, 1503 retype(dst, BRW_REGISTER_TYPE_UW), 1504 src.nr, 1505 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), 1506 SURF_INDEX_TEXTURE(inst->sampler), 1507 inst->sampler, 1508 WRITEMASK_XYZW, 1509 msg_type, 1510 rlen, 1511 inst->mlen + 1, 1512 0, 1513 1, 1514 BRW_SAMPLER_SIMD_MODE_SIMD8); 1515} 1516 1517 1518/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input 1519 * looking like: 1520 * 1521 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br 1522 * 1523 * and we're trying to produce: 1524 * 1525 * DDX DDY 1526 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) 1527 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) 1528 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) 1529 * (ss0.br - ss0.bl) (ss0.tr - ss0.br) 1530 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) 1531 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) 1532 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) 1533 * (ss1.br - ss1.bl) (ss1.tr - ss1.br) 1534 * 1535 * and add another set of two more subspans if in 16-pixel dispatch mode. 1536 * 1537 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result 1538 * for each pair, and vertstride = 2 jumps us 2 elements after processing a 1539 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled 1540 * between each other. We could probably do it like ddx and swizzle the right 1541 * order later, but bail for now and just produce 1542 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) 1543 */ 1544void 1545fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1546{ 1547 struct brw_reg src0 = brw_reg(src.file, src.nr, 1, 1548 BRW_REGISTER_TYPE_F, 1549 BRW_VERTICAL_STRIDE_2, 1550 BRW_WIDTH_2, 1551 BRW_HORIZONTAL_STRIDE_0, 1552 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1553 struct brw_reg src1 = brw_reg(src.file, src.nr, 0, 1554 BRW_REGISTER_TYPE_F, 1555 BRW_VERTICAL_STRIDE_2, 1556 BRW_WIDTH_2, 1557 BRW_HORIZONTAL_STRIDE_0, 1558 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1559 brw_ADD(p, dst, src0, negate(src1)); 1560} 1561 1562void 1563fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) 1564{ 1565 struct brw_reg src0 = brw_reg(src.file, src.nr, 0, 1566 BRW_REGISTER_TYPE_F, 1567 BRW_VERTICAL_STRIDE_4, 1568 BRW_WIDTH_4, 1569 BRW_HORIZONTAL_STRIDE_0, 1570 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1571 struct brw_reg src1 = brw_reg(src.file, src.nr, 2, 1572 BRW_REGISTER_TYPE_F, 1573 BRW_VERTICAL_STRIDE_4, 1574 BRW_WIDTH_4, 1575 BRW_HORIZONTAL_STRIDE_0, 1576 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); 1577 brw_ADD(p, dst, src0, negate(src1)); 1578} 1579 1580void 1581fs_visitor::generate_discard(fs_inst *inst) 1582{ 1583 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); 1584 brw_push_insn_state(p); 1585 brw_set_mask_control(p, BRW_MASK_DISABLE); 1586 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ 1587 brw_AND(p, g0, c->emit_mask_reg, g0); 1588 brw_pop_insn_state(p); 1589} 1590 1591static void 1592trivial_assign_reg(int header_size, fs_reg *reg) 1593{ 1594 if (reg->file == GRF && reg->reg != 0) { 1595 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset; 1596 reg->reg = 0; 1597 } 1598} 1599 1600void 1601fs_visitor::assign_curb_setup() 1602{ 1603 c->prog_data.first_curbe_grf = c->key.nr_payload_regs; 1604 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; 1605 1606 if (intel->gen == 5 && (c->prog_data.first_curbe_grf + 1607 c->prog_data.curb_read_length) & 1) { 1608 /* Align the start of the interpolation coefficients so that we can use 1609 * the PLN instruction. 1610 */ 1611 c->prog_data.first_curbe_grf++; 1612 } 1613 1614 /* Map the offsets in the UNIFORM file to fixed HW regs. */ 1615 foreach_iter(exec_list_iterator, iter, this->instructions) { 1616 fs_inst *inst = (fs_inst *)iter.get(); 1617 1618 for (unsigned int i = 0; i < 3; i++) { 1619 if (inst->src[i].file == UNIFORM) { 1620 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; 1621 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf + 1622 constant_nr / 8, 1623 constant_nr % 8); 1624 1625 inst->src[i].file = FIXED_HW_REG; 1626 inst->src[i].fixed_hw_reg = brw_reg; 1627 } 1628 } 1629 } 1630} 1631 1632void 1633fs_visitor::assign_urb_setup() 1634{ 1635 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length; 1636 int interp_reg_nr[FRAG_ATTRIB_MAX]; 1637 1638 c->prog_data.urb_read_length = 0; 1639 1640 /* Figure out where each of the incoming setup attributes lands. */ 1641 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { 1642 interp_reg_nr[i] = -1; 1643 1644 if (i != FRAG_ATTRIB_WPOS && 1645 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) 1646 continue; 1647 1648 /* Each attribute is 4 setup channels, each of which is half a reg. */ 1649 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length; 1650 c->prog_data.urb_read_length += 2; 1651 } 1652 1653 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses 1654 * the correct setup input. 1655 */ 1656 foreach_iter(exec_list_iterator, iter, this->instructions) { 1657 fs_inst *inst = (fs_inst *)iter.get(); 1658 1659 if (inst->opcode != FS_OPCODE_LINTERP) 1660 continue; 1661 1662 assert(inst->src[2].file == FIXED_HW_REG); 1663 1664 int location = inst->src[2].fixed_hw_reg.nr / 2; 1665 assert(interp_reg_nr[location] != -1); 1666 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] + 1667 (inst->src[2].fixed_hw_reg.nr & 1)); 1668 } 1669 1670 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; 1671} 1672 1673void 1674fs_visitor::assign_regs() 1675{ 1676 int header_size = this->first_non_payload_grf; 1677 int last_grf = 0; 1678 1679 /* FINISHME: trivial assignment of register numbers */ 1680 foreach_iter(exec_list_iterator, iter, this->instructions) { 1681 fs_inst *inst = (fs_inst *)iter.get(); 1682 1683 trivial_assign_reg(header_size, &inst->dst); 1684 trivial_assign_reg(header_size, &inst->src[0]); 1685 trivial_assign_reg(header_size, &inst->src[1]); 1686 1687 last_grf = MAX2(last_grf, inst->dst.hw_reg); 1688 last_grf = MAX2(last_grf, inst->src[0].hw_reg); 1689 last_grf = MAX2(last_grf, inst->src[1].hw_reg); 1690 } 1691 1692 this->grf_used = last_grf + 1; 1693} 1694 1695static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) 1696{ 1697 struct brw_reg brw_reg; 1698 1699 switch (reg->file) { 1700 case GRF: 1701 case ARF: 1702 case MRF: 1703 brw_reg = brw_vec8_reg(reg->file, 1704 reg->hw_reg, 0); 1705 brw_reg = retype(brw_reg, reg->type); 1706 break; 1707 case IMM: 1708 switch (reg->type) { 1709 case BRW_REGISTER_TYPE_F: 1710 brw_reg = brw_imm_f(reg->imm.f); 1711 break; 1712 case BRW_REGISTER_TYPE_D: 1713 brw_reg = brw_imm_d(reg->imm.i); 1714 break; 1715 case BRW_REGISTER_TYPE_UD: 1716 brw_reg = brw_imm_ud(reg->imm.u); 1717 break; 1718 default: 1719 assert(!"not reached"); 1720 break; 1721 } 1722 break; 1723 case FIXED_HW_REG: 1724 brw_reg = reg->fixed_hw_reg; 1725 break; 1726 case BAD_FILE: 1727 /* Probably unused. */ 1728 brw_reg = brw_null_reg(); 1729 break; 1730 case UNIFORM: 1731 assert(!"not reached"); 1732 brw_reg = brw_null_reg(); 1733 break; 1734 } 1735 if (reg->abs) 1736 brw_reg = brw_abs(brw_reg); 1737 if (reg->negate) 1738 brw_reg = negate(brw_reg); 1739 1740 return brw_reg; 1741} 1742 1743void 1744fs_visitor::generate_code() 1745{ 1746 unsigned int annotation_len = 0; 1747 int last_native_inst = 0; 1748 struct brw_instruction *if_stack[16], *loop_stack[16]; 1749 int if_stack_depth = 0, loop_stack_depth = 0; 1750 int if_depth_in_loop[16]; 1751 1752 if_depth_in_loop[loop_stack_depth] = 0; 1753 1754 memset(&if_stack, 0, sizeof(if_stack)); 1755 foreach_iter(exec_list_iterator, iter, this->instructions) { 1756 fs_inst *inst = (fs_inst *)iter.get(); 1757 struct brw_reg src[3], dst; 1758 1759 for (unsigned int i = 0; i < 3; i++) { 1760 src[i] = brw_reg_from_fs_reg(&inst->src[i]); 1761 } 1762 dst = brw_reg_from_fs_reg(&inst->dst); 1763 1764 brw_set_conditionalmod(p, inst->conditional_mod); 1765 brw_set_predicate_control(p, inst->predicated); 1766 1767 switch (inst->opcode) { 1768 case BRW_OPCODE_MOV: 1769 brw_MOV(p, dst, src[0]); 1770 break; 1771 case BRW_OPCODE_ADD: 1772 brw_ADD(p, dst, src[0], src[1]); 1773 break; 1774 case BRW_OPCODE_MUL: 1775 brw_MUL(p, dst, src[0], src[1]); 1776 break; 1777 1778 case BRW_OPCODE_FRC: 1779 brw_FRC(p, dst, src[0]); 1780 break; 1781 case BRW_OPCODE_RNDD: 1782 brw_RNDD(p, dst, src[0]); 1783 break; 1784 case BRW_OPCODE_RNDZ: 1785 brw_RNDZ(p, dst, src[0]); 1786 break; 1787 1788 case BRW_OPCODE_AND: 1789 brw_AND(p, dst, src[0], src[1]); 1790 break; 1791 case BRW_OPCODE_OR: 1792 brw_OR(p, dst, src[0], src[1]); 1793 break; 1794 case BRW_OPCODE_XOR: 1795 brw_XOR(p, dst, src[0], src[1]); 1796 break; 1797 1798 case BRW_OPCODE_CMP: 1799 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 1800 break; 1801 case BRW_OPCODE_SEL: 1802 brw_SEL(p, dst, src[0], src[1]); 1803 break; 1804 1805 case BRW_OPCODE_IF: 1806 assert(if_stack_depth < 16); 1807 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8); 1808 if_depth_in_loop[loop_stack_depth]++; 1809 if_stack_depth++; 1810 break; 1811 case BRW_OPCODE_ELSE: 1812 if_stack[if_stack_depth - 1] = 1813 brw_ELSE(p, if_stack[if_stack_depth - 1]); 1814 break; 1815 case BRW_OPCODE_ENDIF: 1816 if_stack_depth--; 1817 brw_ENDIF(p , if_stack[if_stack_depth]); 1818 if_depth_in_loop[loop_stack_depth]--; 1819 break; 1820 1821 case BRW_OPCODE_DO: 1822 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 1823 if_depth_in_loop[loop_stack_depth] = 0; 1824 break; 1825 1826 case BRW_OPCODE_BREAK: 1827 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 1828 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1829 break; 1830 case BRW_OPCODE_CONTINUE: 1831 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 1832 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1833 break; 1834 1835 case BRW_OPCODE_WHILE: { 1836 struct brw_instruction *inst0, *inst1; 1837 GLuint br = 1; 1838 1839 if (intel->gen == 5) 1840 br = 2; 1841 1842 assert(loop_stack_depth > 0); 1843 loop_stack_depth--; 1844 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 1845 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 1846 while (inst0 > loop_stack[loop_stack_depth]) { 1847 inst0--; 1848 if (inst0->header.opcode == BRW_OPCODE_BREAK && 1849 inst0->bits3.if_else.jump_count == 0) { 1850 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 1851 } 1852 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 1853 inst0->bits3.if_else.jump_count == 0) { 1854 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 1855 } 1856 } 1857 } 1858 break; 1859 1860 case FS_OPCODE_RCP: 1861 case FS_OPCODE_RSQ: 1862 case FS_OPCODE_SQRT: 1863 case FS_OPCODE_EXP2: 1864 case FS_OPCODE_LOG2: 1865 case FS_OPCODE_POW: 1866 case FS_OPCODE_SIN: 1867 case FS_OPCODE_COS: 1868 generate_math(inst, dst, src); 1869 break; 1870 case FS_OPCODE_LINTERP: 1871 generate_linterp(inst, dst, src); 1872 break; 1873 case FS_OPCODE_TEX: 1874 case FS_OPCODE_TXB: 1875 case FS_OPCODE_TXL: 1876 generate_tex(inst, dst, src[0]); 1877 break; 1878 case FS_OPCODE_DISCARD: 1879 generate_discard(inst); 1880 break; 1881 case FS_OPCODE_DDX: 1882 generate_ddx(inst, dst, src[0]); 1883 break; 1884 case FS_OPCODE_DDY: 1885 generate_ddy(inst, dst, src[0]); 1886 break; 1887 case FS_OPCODE_FB_WRITE: 1888 generate_fb_write(inst); 1889 break; 1890 default: 1891 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 1892 _mesa_problem(ctx, "Unsupported opcode `%s' in FS", 1893 brw_opcodes[inst->opcode].name); 1894 } else { 1895 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); 1896 } 1897 this->fail = true; 1898 } 1899 1900 if (annotation_len < p->nr_insn) { 1901 annotation_len *= 2; 1902 if (annotation_len < 16) 1903 annotation_len = 16; 1904 1905 this->annotation_string = talloc_realloc(this->mem_ctx, 1906 annotation_string, 1907 const char *, 1908 annotation_len); 1909 this->annotation_ir = talloc_realloc(this->mem_ctx, 1910 annotation_ir, 1911 ir_instruction *, 1912 annotation_len); 1913 } 1914 1915 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 1916 this->annotation_string[i] = inst->annotation; 1917 this->annotation_ir[i] = inst->ir; 1918 } 1919 last_native_inst = p->nr_insn; 1920 } 1921} 1922 1923GLboolean 1924brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) 1925{ 1926 struct brw_compile *p = &c->func; 1927 struct intel_context *intel = &brw->intel; 1928 GLcontext *ctx = &intel->ctx; 1929 struct brw_shader *shader = NULL; 1930 struct gl_shader_program *prog = ctx->Shader.CurrentProgram; 1931 1932 if (!prog) 1933 return GL_FALSE; 1934 1935 if (!using_new_fs) 1936 return GL_FALSE; 1937 1938 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { 1939 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { 1940 shader = (struct brw_shader *)prog->_LinkedShaders[i]; 1941 break; 1942 } 1943 } 1944 if (!shader) 1945 return GL_FALSE; 1946 1947 /* We always use 8-wide mode, at least for now. For one, flow 1948 * control only works in 8-wide. Also, when we're fragment shader 1949 * bound, we're almost always under register pressure as well, so 1950 * 8-wide would save us from the performance cliff of spilling 1951 * regs. 1952 */ 1953 c->dispatch_width = 8; 1954 1955 if (INTEL_DEBUG & DEBUG_WM) { 1956 printf("GLSL IR for native fragment shader %d:\n", prog->Name); 1957 _mesa_print_ir(shader->ir, NULL); 1958 printf("\n"); 1959 } 1960 1961 /* Now the main event: Visit the shader IR and generate our FS IR for it. 1962 */ 1963 fs_visitor v(c, shader); 1964 1965 if (0) { 1966 v.emit_dummy_fs(); 1967 } else { 1968 v.emit_interpolation(); 1969 1970 /* Generate FS IR for main(). (the visitor only descends into 1971 * functions called "main"). 1972 */ 1973 foreach_iter(exec_list_iterator, iter, *shader->ir) { 1974 ir_instruction *ir = (ir_instruction *)iter.get(); 1975 v.base_ir = ir; 1976 ir->accept(&v); 1977 } 1978 1979 v.emit_fb_writes(); 1980 v.assign_curb_setup(); 1981 v.assign_urb_setup(); 1982 v.assign_regs(); 1983 } 1984 1985 v.generate_code(); 1986 1987 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ 1988 1989 if (v.fail) 1990 return GL_FALSE; 1991 1992 if (INTEL_DEBUG & DEBUG_WM) { 1993 const char *last_annotation_string = NULL; 1994 ir_instruction *last_annotation_ir = NULL; 1995 1996 printf("Native code for fragment shader %d:\n", prog->Name); 1997 for (unsigned int i = 0; i < p->nr_insn; i++) { 1998 if (last_annotation_ir != v.annotation_ir[i]) { 1999 last_annotation_ir = v.annotation_ir[i]; 2000 if (last_annotation_ir) { 2001 printf(" "); 2002 last_annotation_ir->print(); 2003 printf("\n"); 2004 } 2005 } 2006 if (last_annotation_string != v.annotation_string[i]) { 2007 last_annotation_string = v.annotation_string[i]; 2008 if (last_annotation_string) 2009 printf(" %s\n", last_annotation_string); 2010 } 2011 brw_disasm(stdout, &p->store[i], intel->gen); 2012 } 2013 printf("\n"); 2014 } 2015 2016 c->prog_data.total_grf = v.grf_used; 2017 c->prog_data.total_scratch = 0; 2018 2019 return GL_TRUE; 2020} 2021