brw_vec4_visitor.cpp revision 8e947c2546c25c0dfa93b538e54113af1bf582df
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. Move the source to the corresponding slots 126 * of the destination generally work. 127 */ 128 src_reg expanded = src_reg(this, glsl_type::float_type); 129 emit(BRW_OPCODE_MOV, dst, src); 130 src = expanded; 131 132 emit(opcode, dst, src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar()); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_all_equal: 593 case ir_binop_nequal: 594 case ir_binop_any_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 default: 600 assert(!"not reached"); 601 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 603 return; 604 } 605 return; 606 } 607 608 ir->condition->accept(this); 609 610 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 611 this->result, src_reg(0)); 612 inst->conditional_mod = BRW_CONDITIONAL_NZ; 613} 614 615void 616vec4_visitor::visit(ir_variable *ir) 617{ 618 dst_reg *reg = NULL; 619 620 if (variable_storage(ir)) 621 return; 622 623 switch (ir->mode) { 624 case ir_var_in: 625 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 626 break; 627 628 case ir_var_out: 629 reg = new(mem_ctx) dst_reg(this, ir->type); 630 631 for (int i = 0; i < type_size(ir->type); i++) { 632 output_reg[ir->location + i] = *reg; 633 output_reg[ir->location + i].reg_offset = i; 634 } 635 break; 636 637 case ir_var_auto: 638 case ir_var_temporary: 639 reg = new(mem_ctx) dst_reg(this, ir->type); 640 break; 641 642 case ir_var_uniform: 643 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 644 645 if (!strncmp(ir->name, "gl_", 3)) { 646 setup_builtin_uniform_values(ir); 647 } else { 648 setup_uniform_values(ir->location, ir->type); 649 } 650 break; 651 652 default: 653 assert(!"not reached"); 654 } 655 656 reg->type = brw_type_for_base_type(ir->type); 657 hash_table_insert(this->variable_ht, reg, ir); 658} 659 660void 661vec4_visitor::visit(ir_loop *ir) 662{ 663 ir_dereference_variable *counter = NULL; 664 665 fail("not yet\n"); 666 667 /* We don't want debugging output to print the whole body of the 668 * loop as the annotation. 669 */ 670 this->base_ir = NULL; 671 672 if (ir->counter != NULL) 673 counter = new(ir) ir_dereference_variable(ir->counter); 674 675 if (ir->from != NULL) { 676 assert(ir->counter != NULL); 677 678 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 679 680 a->accept(this); 681 delete a; 682 } 683 684 emit(BRW_OPCODE_DO); 685 686 if (ir->to) { 687 ir_expression *e = 688 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 689 counter, ir->to); 690 ir_if *if_stmt = new(ir) ir_if(e); 691 692 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 693 694 if_stmt->then_instructions.push_tail(brk); 695 696 if_stmt->accept(this); 697 698 delete if_stmt; 699 delete e; 700 delete brk; 701 } 702 703 visit_instructions(&ir->body_instructions); 704 705 if (ir->increment) { 706 ir_expression *e = 707 new(ir) ir_expression(ir_binop_add, counter->type, 708 counter, ir->increment); 709 710 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 711 712 a->accept(this); 713 delete a; 714 delete e; 715 } 716 717 emit(BRW_OPCODE_WHILE); 718} 719 720void 721vec4_visitor::visit(ir_loop_jump *ir) 722{ 723 switch (ir->mode) { 724 case ir_loop_jump::jump_break: 725 emit(BRW_OPCODE_BREAK); 726 break; 727 case ir_loop_jump::jump_continue: 728 emit(BRW_OPCODE_CONTINUE); 729 break; 730 } 731} 732 733 734void 735vec4_visitor::visit(ir_function_signature *ir) 736{ 737 assert(0); 738 (void)ir; 739} 740 741void 742vec4_visitor::visit(ir_function *ir) 743{ 744 /* Ignore function bodies other than main() -- we shouldn't see calls to 745 * them since they should all be inlined. 746 */ 747 if (strcmp(ir->name, "main") == 0) { 748 const ir_function_signature *sig; 749 exec_list empty; 750 751 sig = ir->matching_signature(&empty); 752 753 assert(sig); 754 755 visit_instructions(&sig->body); 756 } 757} 758 759GLboolean 760vec4_visitor::try_emit_sat(ir_expression *ir) 761{ 762 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 763 if (!sat_src) 764 return false; 765 766 sat_src->accept(this); 767 src_reg src = this->result; 768 769 this->result = src_reg(this, ir->type); 770 vec4_instruction *inst; 771 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 772 inst->saturate = true; 773 774 return true; 775} 776 777void 778vec4_visitor::emit_bool_comparison(unsigned int op, 779 dst_reg dst, src_reg src0, src_reg src1) 780{ 781 /* original gen4 does destination conversion before comparison. */ 782 if (intel->gen < 5) 783 dst.type = src0.type; 784 785 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 786 inst->conditional_mod = brw_conditional_for_comparison(op); 787 788 dst.type = BRW_REGISTER_TYPE_D; 789 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 790} 791 792void 793vec4_visitor::visit(ir_expression *ir) 794{ 795 unsigned int operand; 796 src_reg op[Elements(ir->operands)]; 797 src_reg result_src; 798 dst_reg result_dst; 799 vec4_instruction *inst; 800 801 if (try_emit_sat(ir)) 802 return; 803 804 for (operand = 0; operand < ir->get_num_operands(); operand++) { 805 this->result.file = BAD_FILE; 806 ir->operands[operand]->accept(this); 807 if (this->result.file == BAD_FILE) { 808 printf("Failed to get tree for expression operand:\n"); 809 ir->operands[operand]->print(); 810 exit(1); 811 } 812 op[operand] = this->result; 813 814 /* Matrix expression operands should have been broken down to vector 815 * operations already. 816 */ 817 assert(!ir->operands[operand]->type->is_matrix()); 818 } 819 820 int vector_elements = ir->operands[0]->type->vector_elements; 821 if (ir->operands[1]) { 822 vector_elements = MAX2(vector_elements, 823 ir->operands[1]->type->vector_elements); 824 } 825 826 this->result.file = BAD_FILE; 827 828 /* Storage for our result. Ideally for an assignment we'd be using 829 * the actual storage for the result here, instead. 830 */ 831 result_src = src_reg(this, ir->type); 832 /* convenience for the emit functions below. */ 833 result_dst = dst_reg(result_src); 834 /* If nothing special happens, this is the result. */ 835 this->result = result_src; 836 /* Limit writes to the channels that will be used by result_src later. 837 * This does limit this temp's use as a temporary for multi-instruction 838 * sequences. 839 */ 840 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 841 842 switch (ir->operation) { 843 case ir_unop_logic_not: 844 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 845 * ones complement of the whole register, not just bit 0. 846 */ 847 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 848 break; 849 case ir_unop_neg: 850 op[0].negate = !op[0].negate; 851 this->result = op[0]; 852 break; 853 case ir_unop_abs: 854 op[0].abs = true; 855 op[0].negate = false; 856 this->result = op[0]; 857 break; 858 859 case ir_unop_sign: 860 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 861 862 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 863 inst->conditional_mod = BRW_CONDITIONAL_G; 864 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 865 inst->predicate = BRW_PREDICATE_NORMAL; 866 867 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 868 inst->conditional_mod = BRW_CONDITIONAL_L; 869 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 870 inst->predicate = BRW_PREDICATE_NORMAL; 871 872 break; 873 874 case ir_unop_rcp: 875 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 876 break; 877 878 case ir_unop_exp2: 879 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 880 break; 881 case ir_unop_log2: 882 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 883 break; 884 case ir_unop_exp: 885 case ir_unop_log: 886 assert(!"not reached: should be handled by ir_explog_to_explog2"); 887 break; 888 case ir_unop_sin: 889 case ir_unop_sin_reduced: 890 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 891 break; 892 case ir_unop_cos: 893 case ir_unop_cos_reduced: 894 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 895 break; 896 897 case ir_unop_dFdx: 898 case ir_unop_dFdy: 899 assert(!"derivatives not valid in vertex shader"); 900 break; 901 902 case ir_unop_noise: 903 assert(!"not reached: should be handled by lower_noise"); 904 break; 905 906 case ir_binop_add: 907 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 908 break; 909 case ir_binop_sub: 910 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 911 break; 912 913 case ir_binop_mul: 914 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 915 break; 916 case ir_binop_div: 917 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 918 case ir_binop_mod: 919 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 920 break; 921 922 case ir_binop_less: 923 case ir_binop_greater: 924 case ir_binop_lequal: 925 case ir_binop_gequal: 926 case ir_binop_equal: 927 case ir_binop_nequal: { 928 dst_reg temp = result_dst; 929 /* original gen4 does implicit conversion before comparison. */ 930 if (intel->gen < 5) 931 temp.type = op[0].type; 932 933 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 934 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 935 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 936 break; 937 } 938 939 case ir_binop_all_equal: 940 /* "==" operator producing a scalar boolean. */ 941 if (ir->operands[0]->type->is_vector() || 942 ir->operands[1]->type->is_vector()) { 943 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 944 inst->conditional_mod = BRW_CONDITIONAL_Z; 945 946 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 947 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 948 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 949 } else { 950 dst_reg temp = result_dst; 951 /* original gen4 does implicit conversion before comparison. */ 952 if (intel->gen < 5) 953 temp.type = op[0].type; 954 955 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 956 inst->conditional_mod = BRW_CONDITIONAL_NZ; 957 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 958 } 959 break; 960 case ir_binop_any_nequal: 961 /* "!=" operator producing a scalar boolean. */ 962 if (ir->operands[0]->type->is_vector() || 963 ir->operands[1]->type->is_vector()) { 964 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 965 inst->conditional_mod = BRW_CONDITIONAL_NZ; 966 967 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 968 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 969 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 970 } else { 971 dst_reg temp = result_dst; 972 /* original gen4 does implicit conversion before comparison. */ 973 if (intel->gen < 5) 974 temp.type = op[0].type; 975 976 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 977 inst->conditional_mod = BRW_CONDITIONAL_NZ; 978 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 979 } 980 break; 981 982 case ir_unop_any: 983 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 984 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 985 986 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 987 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 988 break; 989 990 case ir_binop_logic_xor: 991 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 992 break; 993 994 case ir_binop_logic_or: 995 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 996 break; 997 998 case ir_binop_logic_and: 999 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1000 break; 1001 1002 case ir_binop_dot: 1003 assert(ir->operands[0]->type->is_vector()); 1004 assert(ir->operands[0]->type == ir->operands[1]->type); 1005 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1006 break; 1007 1008 case ir_unop_sqrt: 1009 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1010 break; 1011 case ir_unop_rsq: 1012 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1013 break; 1014 case ir_unop_i2f: 1015 case ir_unop_i2u: 1016 case ir_unop_u2i: 1017 case ir_unop_u2f: 1018 case ir_unop_b2f: 1019 case ir_unop_b2i: 1020 case ir_unop_f2i: 1021 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1022 break; 1023 case ir_unop_f2b: 1024 case ir_unop_i2b: { 1025 dst_reg temp = result_dst; 1026 /* original gen4 does implicit conversion before comparison. */ 1027 if (intel->gen < 5) 1028 temp.type = op[0].type; 1029 1030 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1031 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1032 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1033 break; 1034 } 1035 1036 case ir_unop_trunc: 1037 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1038 break; 1039 case ir_unop_ceil: 1040 op[0].negate = !op[0].negate; 1041 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1042 this->result.negate = true; 1043 break; 1044 case ir_unop_floor: 1045 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1046 break; 1047 case ir_unop_fract: 1048 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1049 break; 1050 case ir_unop_round_even: 1051 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1052 break; 1053 1054 case ir_binop_min: 1055 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1056 inst->conditional_mod = BRW_CONDITIONAL_L; 1057 1058 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1059 inst->predicate = BRW_PREDICATE_NORMAL; 1060 break; 1061 case ir_binop_max: 1062 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1063 inst->conditional_mod = BRW_CONDITIONAL_G; 1064 1065 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1066 inst->predicate = BRW_PREDICATE_NORMAL; 1067 break; 1068 1069 case ir_binop_pow: 1070 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1071 break; 1072 1073 case ir_unop_bit_not: 1074 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1075 break; 1076 case ir_binop_bit_and: 1077 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1078 break; 1079 case ir_binop_bit_xor: 1080 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1081 break; 1082 case ir_binop_bit_or: 1083 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1084 break; 1085 1086 case ir_binop_lshift: 1087 case ir_binop_rshift: 1088 assert(!"GLSL 1.30 features unsupported"); 1089 break; 1090 1091 case ir_quadop_vector: 1092 assert(!"not reached: should be handled by lower_quadop_vector"); 1093 break; 1094 } 1095} 1096 1097 1098void 1099vec4_visitor::visit(ir_swizzle *ir) 1100{ 1101 src_reg src; 1102 int i = 0; 1103 int swizzle[4]; 1104 1105 /* Note that this is only swizzles in expressions, not those on the left 1106 * hand side of an assignment, which do write masking. See ir_assignment 1107 * for that. 1108 */ 1109 1110 ir->val->accept(this); 1111 src = this->result; 1112 assert(src.file != BAD_FILE); 1113 1114 for (i = 0; i < ir->type->vector_elements; i++) { 1115 switch (i) { 1116 case 0: 1117 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1118 break; 1119 case 1: 1120 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1121 break; 1122 case 2: 1123 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1124 break; 1125 case 3: 1126 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1127 break; 1128 } 1129 } 1130 for (; i < 4; i++) { 1131 /* Replicate the last channel out. */ 1132 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1133 } 1134 1135 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1136 1137 this->result = src; 1138} 1139 1140void 1141vec4_visitor::visit(ir_dereference_variable *ir) 1142{ 1143 const struct glsl_type *type = ir->type; 1144 dst_reg *reg = variable_storage(ir->var); 1145 1146 if (!reg) { 1147 fail("Failed to find variable storage for %s\n", ir->var->name); 1148 this->result = src_reg(brw_null_reg()); 1149 return; 1150 } 1151 1152 this->result = src_reg(*reg); 1153 1154 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1155 this->result.swizzle = swizzle_for_size(type->vector_elements); 1156} 1157 1158void 1159vec4_visitor::visit(ir_dereference_array *ir) 1160{ 1161 ir_constant *constant_index; 1162 src_reg src; 1163 int element_size = type_size(ir->type); 1164 1165 constant_index = ir->array_index->constant_expression_value(); 1166 1167 ir->array->accept(this); 1168 src = this->result; 1169 1170 if (constant_index) { 1171 src.reg_offset += constant_index->value.i[0] * element_size; 1172 } else { 1173#if 0 /* Variable array index */ 1174 /* Variable index array dereference. It eats the "vec4" of the 1175 * base of the array and an index that offsets the Mesa register 1176 * index. 1177 */ 1178 ir->array_index->accept(this); 1179 1180 src_reg index_reg; 1181 1182 if (element_size == 1) { 1183 index_reg = this->result; 1184 } else { 1185 index_reg = src_reg(this, glsl_type::float_type); 1186 1187 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1188 this->result, src_reg_for_float(element_size)); 1189 } 1190 1191 src.reladdr = ralloc(mem_ctx, src_reg); 1192 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1193#endif 1194 } 1195 1196 /* If the type is smaller than a vec4, replicate the last channel out. */ 1197 if (ir->type->is_scalar() || ir->type->is_vector()) 1198 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1199 else 1200 src.swizzle = BRW_SWIZZLE_NOOP; 1201 src.type = brw_type_for_base_type(ir->type); 1202 1203 this->result = src; 1204} 1205 1206void 1207vec4_visitor::visit(ir_dereference_record *ir) 1208{ 1209 unsigned int i; 1210 const glsl_type *struct_type = ir->record->type; 1211 int offset = 0; 1212 1213 ir->record->accept(this); 1214 1215 for (i = 0; i < struct_type->length; i++) { 1216 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1217 break; 1218 offset += type_size(struct_type->fields.structure[i].type); 1219 } 1220 1221 /* If the type is smaller than a vec4, replicate the last channel out. */ 1222 if (ir->type->is_scalar() || ir->type->is_vector()) 1223 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1224 else 1225 this->result.swizzle = BRW_SWIZZLE_NOOP; 1226 this->result.type = brw_type_for_base_type(ir->type); 1227 1228 this->result.reg_offset += offset; 1229} 1230 1231/** 1232 * We want to be careful in assignment setup to hit the actual storage 1233 * instead of potentially using a temporary like we might with the 1234 * ir_dereference handler. 1235 */ 1236static dst_reg 1237get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1238{ 1239 /* The LHS must be a dereference. If the LHS is a variable indexed array 1240 * access of a vector, it must be separated into a series conditional moves 1241 * before reaching this point (see ir_vec_index_to_cond_assign). 1242 */ 1243 assert(ir->as_dereference()); 1244 ir_dereference_array *deref_array = ir->as_dereference_array(); 1245 if (deref_array) { 1246 assert(!deref_array->array->type->is_vector()); 1247 } 1248 1249 /* Use the rvalue deref handler for the most part. We'll ignore 1250 * swizzles in it and write swizzles using writemask, though. 1251 */ 1252 ir->accept(v); 1253 return dst_reg(v->result); 1254} 1255 1256void 1257vec4_visitor::emit_block_move(ir_assignment *ir) 1258{ 1259 ir->rhs->accept(this); 1260 src_reg src = this->result; 1261 1262 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1263 1264 /* FINISHME: This should really set to the correct maximal writemask for each 1265 * FINISHME: component written (in the loops below). 1266 */ 1267 dst.writemask = WRITEMASK_XYZW; 1268 1269 for (int i = 0; i < type_size(ir->lhs->type); i++) { 1270 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1271 if (ir->condition) 1272 inst->predicate = BRW_PREDICATE_NORMAL; 1273 1274 dst.reg_offset++; 1275 src.reg_offset++; 1276 } 1277} 1278 1279void 1280vec4_visitor::visit(ir_assignment *ir) 1281{ 1282 if (!ir->lhs->type->is_scalar() && 1283 !ir->lhs->type->is_vector()) { 1284 emit_block_move(ir); 1285 return; 1286 } 1287 1288 /* Now we're down to just a scalar/vector with writemasks. */ 1289 int i; 1290 1291 ir->rhs->accept(this); 1292 src_reg src = this->result; 1293 1294 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1295 1296 int swizzles[4]; 1297 int first_enabled_chan = 0; 1298 int src_chan = 0; 1299 1300 assert(ir->lhs->type->is_vector() || 1301 ir->lhs->type->is_scalar()); 1302 dst.writemask = ir->write_mask; 1303 1304 for (int i = 0; i < 4; i++) { 1305 if (dst.writemask & (1 << i)) { 1306 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1307 break; 1308 } 1309 } 1310 1311 /* Swizzle a small RHS vector into the channels being written. 1312 * 1313 * glsl ir treats write_mask as dictating how many channels are 1314 * present on the RHS while in our instructions we need to make 1315 * those channels appear in the slots of the vec4 they're written to. 1316 */ 1317 for (int i = 0; i < 4; i++) { 1318 if (dst.writemask & (1 << i)) 1319 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1320 else 1321 swizzles[i] = first_enabled_chan; 1322 } 1323 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1324 swizzles[2], swizzles[3]); 1325 1326 if (ir->condition) { 1327 emit_bool_to_cond_code(ir->condition); 1328 } 1329 1330 for (i = 0; i < type_size(ir->lhs->type); i++) { 1331 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1332 1333 if (ir->condition) 1334 inst->predicate = BRW_PREDICATE_NORMAL; 1335 1336 dst.reg_offset++; 1337 src.reg_offset++; 1338 } 1339} 1340 1341 1342void 1343vec4_visitor::visit(ir_constant *ir) 1344{ 1345 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1346 src_reg temp_base = src_reg(this, ir->type); 1347 dst_reg temp = dst_reg(temp_base); 1348 1349 foreach_iter(exec_list_iterator, iter, ir->components) { 1350 ir_constant *field_value = (ir_constant *)iter.get(); 1351 int size = type_size(field_value->type); 1352 1353 assert(size > 0); 1354 1355 field_value->accept(this); 1356 src_reg src = this->result; 1357 1358 for (int i = 0; i < (unsigned int)size; i++) { 1359 emit(BRW_OPCODE_MOV, temp, src); 1360 1361 src.reg_offset++; 1362 temp.reg_offset++; 1363 } 1364 } 1365 this->result = temp_base; 1366 return; 1367 } 1368 1369 if (ir->type->is_array()) { 1370 src_reg temp_base = src_reg(this, ir->type); 1371 dst_reg temp = dst_reg(temp_base); 1372 int size = type_size(ir->type->fields.array); 1373 1374 assert(size > 0); 1375 1376 for (unsigned int i = 0; i < ir->type->length; i++) { 1377 ir->array_elements[i]->accept(this); 1378 src_reg src = this->result; 1379 for (int j = 0; j < size; j++) { 1380 emit(BRW_OPCODE_MOV, temp, src); 1381 1382 src.reg_offset++; 1383 temp.reg_offset++; 1384 } 1385 } 1386 this->result = temp_base; 1387 return; 1388 } 1389 1390 if (ir->type->is_matrix()) { 1391 this->result = src_reg(this, ir->type); 1392 dst_reg dst = dst_reg(this->result); 1393 1394 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1395 1396 for (int i = 0; i < ir->type->matrix_columns; i++) { 1397 for (int j = 0; j < ir->type->vector_elements; j++) { 1398 dst.writemask = 1 << j; 1399 emit(BRW_OPCODE_MOV, dst, 1400 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1401 } 1402 dst.reg_offset++; 1403 } 1404 return; 1405 } 1406 1407 this->result = src_reg(this, ir->type); 1408 dst_reg dst = dst_reg(this->result); 1409 1410 for (int i = 0; i < ir->type->vector_elements; i++) { 1411 dst.writemask = 1 << i; 1412 1413 switch (ir->type->base_type) { 1414 case GLSL_TYPE_FLOAT: 1415 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); 1416 break; 1417 case GLSL_TYPE_INT: 1418 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); 1419 break; 1420 case GLSL_TYPE_UINT: 1421 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); 1422 break; 1423 case GLSL_TYPE_BOOL: 1424 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); 1425 break; 1426 default: 1427 assert(!"Non-float/uint/int/bool constant"); 1428 break; 1429 } 1430 } 1431} 1432 1433void 1434vec4_visitor::visit(ir_call *ir) 1435{ 1436 assert(!"not reached"); 1437} 1438 1439void 1440vec4_visitor::visit(ir_texture *ir) 1441{ 1442 assert(!"not reached"); 1443} 1444 1445void 1446vec4_visitor::visit(ir_return *ir) 1447{ 1448 assert(!"not reached"); 1449} 1450 1451void 1452vec4_visitor::visit(ir_discard *ir) 1453{ 1454 assert(!"not reached"); 1455} 1456 1457void 1458vec4_visitor::visit(ir_if *ir) 1459{ 1460 /* Don't point the annotation at the if statement, because then it plus 1461 * the then and else blocks get printed. 1462 */ 1463 this->base_ir = ir->condition; 1464 1465 if (intel->gen == 6) { 1466 emit_if_gen6(ir); 1467 } else { 1468 emit_bool_to_cond_code(ir->condition); 1469 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1470 inst->predicate = BRW_PREDICATE_NORMAL; 1471 } 1472 1473 visit_instructions(&ir->then_instructions); 1474 1475 if (!ir->else_instructions.is_empty()) { 1476 this->base_ir = ir->condition; 1477 emit(BRW_OPCODE_ELSE); 1478 1479 visit_instructions(&ir->else_instructions); 1480 } 1481 1482 this->base_ir = ir->condition; 1483 emit(BRW_OPCODE_ENDIF); 1484} 1485 1486int 1487vec4_visitor::emit_vue_header_gen4(int header_mrf) 1488{ 1489 /* Get the position */ 1490 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1491 1492 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1493 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1494 1495 current_annotation = "NDC"; 1496 dst_reg ndc_w = ndc; 1497 ndc_w.writemask = WRITEMASK_W; 1498 src_reg pos_w = pos; 1499 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1500 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1501 1502 dst_reg ndc_xyz = ndc; 1503 ndc_xyz.writemask = WRITEMASK_XYZ; 1504 1505 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1506 1507 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1508 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1509 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1510 GLuint i; 1511 1512 emit(BRW_OPCODE_MOV, header1, 0u); 1513 1514 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1515 assert(!"finishme: psiz"); 1516 src_reg psiz; 1517 1518 header1.writemask = WRITEMASK_W; 1519 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1520 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1521 } 1522 1523 for (i = 0; i < c->key.nr_userclip; i++) { 1524 vec4_instruction *inst; 1525 1526 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1527 pos, src_reg(c->userplane[i])); 1528 inst->conditional_mod = BRW_CONDITIONAL_L; 1529 1530 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1531 inst->predicate = BRW_PREDICATE_NORMAL; 1532 } 1533 1534 /* i965 clipping workaround: 1535 * 1) Test for -ve rhw 1536 * 2) If set, 1537 * set ndc = (0,0,0,0) 1538 * set ucp[6] = 1 1539 * 1540 * Later, clipping will detect ucp[6] and ensure the primitive is 1541 * clipped against all fixed planes. 1542 */ 1543 if (brw->has_negative_rhw_bug) { 1544#if 0 1545 /* FINISHME */ 1546 brw_CMP(p, 1547 vec8(brw_null_reg()), 1548 BRW_CONDITIONAL_L, 1549 brw_swizzle1(ndc, 3), 1550 brw_imm_f(0)); 1551 1552 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1553 brw_MOV(p, ndc, brw_imm_f(0)); 1554 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1555#endif 1556 } 1557 1558 header1.writemask = WRITEMASK_XYZW; 1559 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1560 } else { 1561 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1562 BRW_REGISTER_TYPE_UD), 0u); 1563 } 1564 1565 if (intel->gen == 5) { 1566 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1567 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1568 * dword 4-7 (m2) is the ndc position (set above) 1569 * dword 8-11 (m3) of the vertex header is the 4D space position 1570 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1571 * m6 is a pad so that the vertex element data is aligned 1572 * m7 is the first vertex data we fill. 1573 */ 1574 current_annotation = "NDC"; 1575 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1576 1577 current_annotation = "gl_Position"; 1578 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1579 1580 /* user clip distance. */ 1581 header_mrf += 2; 1582 1583 /* Pad so that vertex element data is aligned. */ 1584 header_mrf++; 1585 } else { 1586 /* There are 8 dwords in VUE header pre-Ironlake: 1587 * dword 0-3 (m1) is indices, point width, clip flags. 1588 * dword 4-7 (m2) is ndc position (set above) 1589 * 1590 * dword 8-11 (m3) is the first vertex data. 1591 */ 1592 current_annotation = "NDC"; 1593 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1594 1595 current_annotation = "gl_Position"; 1596 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1597 } 1598 1599 return header_mrf; 1600} 1601 1602int 1603vec4_visitor::emit_vue_header_gen6(int header_mrf) 1604{ 1605 struct brw_reg reg; 1606 1607 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1608 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1609 * dword 4-7 (m3) is the 4D space position 1610 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1611 * enabled. 1612 * 1613 * m4 or 6 is the first vertex element data we fill. 1614 */ 1615 1616 current_annotation = "indices, point width, clip flags"; 1617 reg = brw_message_reg(header_mrf++); 1618 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1619 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1620 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1621 src_reg(output_reg[VERT_RESULT_PSIZ])); 1622 } 1623 1624 current_annotation = "gl_Position"; 1625 emit(BRW_OPCODE_MOV, 1626 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1627 1628 current_annotation = "user clip distances"; 1629 if (c->key.nr_userclip) { 1630 for (int i = 0; i < c->key.nr_userclip; i++) { 1631 struct brw_reg m; 1632 if (i < 4) 1633 m = brw_message_reg(header_mrf); 1634 else 1635 m = brw_message_reg(header_mrf + 1); 1636 1637 emit(BRW_OPCODE_DP4, 1638 dst_reg(brw_writemask(m, 1 << (i & 3))), 1639 src_reg(c->userplane[i])); 1640 } 1641 header_mrf += 2; 1642 } 1643 1644 current_annotation = NULL; 1645 1646 return header_mrf; 1647} 1648 1649static int 1650align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1651{ 1652 struct intel_context *intel = &brw->intel; 1653 1654 if (intel->gen >= 6) { 1655 /* URB data written (does not include the message header reg) must 1656 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1657 * section 5.4.3.2.2: URB_INTERLEAVED. 1658 * 1659 * URB entries are allocated on a multiple of 1024 bits, so an 1660 * extra 128 bits written here to make the end align to 256 is 1661 * no problem. 1662 */ 1663 if ((mlen % 2) != 1) 1664 mlen++; 1665 } 1666 1667 return mlen; 1668} 1669 1670/** 1671 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1672 * complete the VS thread. 1673 * 1674 * The VUE layout is documented in Volume 2a. 1675 */ 1676void 1677vec4_visitor::emit_urb_writes() 1678{ 1679 int base_mrf = 1; 1680 int mrf = base_mrf; 1681 int urb_entry_size; 1682 1683 /* FINISHME: edgeflag */ 1684 1685 /* First mrf is the g0-based message header containing URB handles and such, 1686 * which is implied in VS_OPCODE_URB_WRITE. 1687 */ 1688 mrf++; 1689 1690 if (intel->gen >= 6) { 1691 mrf = emit_vue_header_gen6(mrf); 1692 } else { 1693 mrf = emit_vue_header_gen4(mrf); 1694 } 1695 1696 int attr; 1697 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1698 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1699 continue; 1700 1701 /* This is set up in the VUE header. */ 1702 if (attr == VERT_RESULT_HPOS) 1703 continue; 1704 1705 /* This is loaded into the VUE header, and thus doesn't occupy 1706 * an attribute slot. 1707 */ 1708 if (attr == VERT_RESULT_PSIZ) 1709 continue; 1710 1711 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1712 1713 /* If this is MRF 15, we can't fit anything more into this URB 1714 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1715 * even-numbered amount of URB write data, which will meet 1716 * gen6's requirements for length alignment. 1717 */ 1718 if (mrf == 15) 1719 break; 1720 } 1721 1722 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1723 inst->base_mrf = base_mrf; 1724 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1725 inst->eot = true; 1726 1727 urb_entry_size = mrf - base_mrf; 1728 1729 for (; attr < VERT_RESULT_MAX; attr++) { 1730 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1731 continue; 1732 fail("Second URB write not supported.\n"); 1733 break; 1734 } 1735 1736 if (intel->gen == 6) 1737 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1738 else 1739 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1740} 1741 1742vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1743 struct gl_shader_program *prog, 1744 struct brw_shader *shader) 1745{ 1746 this->c = c; 1747 this->p = &c->func; 1748 this->brw = p->brw; 1749 this->intel = &brw->intel; 1750 this->ctx = &intel->ctx; 1751 this->prog = prog; 1752 this->shader = shader; 1753 1754 this->mem_ctx = ralloc_context(NULL); 1755 this->failed = false; 1756 1757 this->base_ir = NULL; 1758 this->current_annotation = NULL; 1759 1760 this->c = c; 1761 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1762 this->prog_data = &c->prog_data; 1763 1764 this->variable_ht = hash_table_ctor(0, 1765 hash_table_pointer_hash, 1766 hash_table_pointer_compare); 1767 1768 this->virtual_grf_sizes = NULL; 1769 this->virtual_grf_count = 0; 1770 this->virtual_grf_array_size = 0; 1771 1772 this->uniforms = 0; 1773 1774 this->variable_ht = hash_table_ctor(0, 1775 hash_table_pointer_hash, 1776 hash_table_pointer_compare); 1777} 1778 1779vec4_visitor::~vec4_visitor() 1780{ 1781 hash_table_dtor(this->variable_ht); 1782} 1783 1784 1785void 1786vec4_visitor::fail(const char *format, ...) 1787{ 1788 va_list va; 1789 char *msg; 1790 1791 if (failed) 1792 return; 1793 1794 failed = true; 1795 1796 va_start(va, format); 1797 msg = ralloc_vasprintf(mem_ctx, format, va); 1798 va_end(va); 1799 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1800 1801 this->fail_msg = msg; 1802 1803 if (INTEL_DEBUG & DEBUG_VS) { 1804 fprintf(stderr, "%s", msg); 1805 } 1806} 1807 1808} /* namespace brw */ 1809