brw_vec4_visitor.cpp revision 4a4857246c79c42d918a84d7e28e9afff3a9ef6d
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. Move the source to the corresponding slots 126 * of the destination generally work. 127 */ 128 src_reg expanded = src_reg(this, glsl_type::float_type); 129 emit(BRW_OPCODE_MOV, dst, src); 130 src = expanded; 131 132 emit(opcode, dst, src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar()); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_all_equal: 593 case ir_binop_nequal: 594 case ir_binop_any_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 default: 600 assert(!"not reached"); 601 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 603 return; 604 } 605 return; 606 } 607 608 ir->condition->accept(this); 609 610 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 611 this->result, src_reg(0)); 612 inst->conditional_mod = BRW_CONDITIONAL_NZ; 613} 614 615void 616vec4_visitor::visit(ir_variable *ir) 617{ 618 dst_reg *reg = NULL; 619 620 if (variable_storage(ir)) 621 return; 622 623 switch (ir->mode) { 624 case ir_var_in: 625 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 626 break; 627 628 case ir_var_out: 629 reg = new(mem_ctx) dst_reg(this, ir->type); 630 631 for (int i = 0; i < type_size(ir->type); i++) { 632 output_reg[ir->location + i] = *reg; 633 output_reg[ir->location + i].reg_offset = i; 634 } 635 break; 636 637 case ir_var_temporary: 638 reg = new(mem_ctx) dst_reg(this, ir->type); 639 break; 640 641 case ir_var_uniform: 642 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 643 644 if (!strncmp(ir->name, "gl_", 3)) { 645 setup_builtin_uniform_values(ir); 646 } else { 647 setup_uniform_values(ir->location, ir->type); 648 } 649 break; 650 } 651 652 reg->type = brw_type_for_base_type(ir->type); 653 hash_table_insert(this->variable_ht, reg, ir); 654} 655 656void 657vec4_visitor::visit(ir_loop *ir) 658{ 659 ir_dereference_variable *counter = NULL; 660 661 /* We don't want debugging output to print the whole body of the 662 * loop as the annotation. 663 */ 664 this->base_ir = NULL; 665 666 if (ir->counter != NULL) 667 counter = new(ir) ir_dereference_variable(ir->counter); 668 669 if (ir->from != NULL) { 670 assert(ir->counter != NULL); 671 672 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 673 674 a->accept(this); 675 delete a; 676 } 677 678 emit(BRW_OPCODE_DO); 679 680 if (ir->to) { 681 ir_expression *e = 682 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 683 counter, ir->to); 684 ir_if *if_stmt = new(ir) ir_if(e); 685 686 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 687 688 if_stmt->then_instructions.push_tail(brk); 689 690 if_stmt->accept(this); 691 692 delete if_stmt; 693 delete e; 694 delete brk; 695 } 696 697 visit_instructions(&ir->body_instructions); 698 699 if (ir->increment) { 700 ir_expression *e = 701 new(ir) ir_expression(ir_binop_add, counter->type, 702 counter, ir->increment); 703 704 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 705 706 a->accept(this); 707 delete a; 708 delete e; 709 } 710 711 emit(BRW_OPCODE_WHILE); 712} 713 714void 715vec4_visitor::visit(ir_loop_jump *ir) 716{ 717 switch (ir->mode) { 718 case ir_loop_jump::jump_break: 719 emit(BRW_OPCODE_BREAK); 720 break; 721 case ir_loop_jump::jump_continue: 722 emit(BRW_OPCODE_CONTINUE); 723 break; 724 } 725} 726 727 728void 729vec4_visitor::visit(ir_function_signature *ir) 730{ 731 assert(0); 732 (void)ir; 733} 734 735void 736vec4_visitor::visit(ir_function *ir) 737{ 738 /* Ignore function bodies other than main() -- we shouldn't see calls to 739 * them since they should all be inlined. 740 */ 741 if (strcmp(ir->name, "main") == 0) { 742 const ir_function_signature *sig; 743 exec_list empty; 744 745 sig = ir->matching_signature(&empty); 746 747 assert(sig); 748 749 visit_instructions(&sig->body); 750 } 751} 752 753GLboolean 754vec4_visitor::try_emit_sat(ir_expression *ir) 755{ 756 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 757 if (!sat_src) 758 return false; 759 760 sat_src->accept(this); 761 src_reg src = this->result; 762 763 this->result = src_reg(this, ir->type); 764 vec4_instruction *inst; 765 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 766 inst->saturate = true; 767 768 return true; 769} 770 771void 772vec4_visitor::emit_bool_comparison(unsigned int op, 773 dst_reg dst, src_reg src0, src_reg src1) 774{ 775 /* original gen4 does destination conversion before comparison. */ 776 if (intel->gen < 5) 777 dst.type = src0.type; 778 779 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 780 inst->conditional_mod = brw_conditional_for_comparison(op); 781 782 dst.type = BRW_REGISTER_TYPE_D; 783 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 784} 785 786void 787vec4_visitor::visit(ir_expression *ir) 788{ 789 unsigned int operand; 790 src_reg op[Elements(ir->operands)]; 791 src_reg result_src; 792 dst_reg result_dst; 793 vec4_instruction *inst; 794 795 if (try_emit_sat(ir)) 796 return; 797 798 for (operand = 0; operand < ir->get_num_operands(); operand++) { 799 this->result.file = BAD_FILE; 800 ir->operands[operand]->accept(this); 801 if (this->result.file == BAD_FILE) { 802 printf("Failed to get tree for expression operand:\n"); 803 ir->operands[operand]->print(); 804 exit(1); 805 } 806 op[operand] = this->result; 807 808 /* Matrix expression operands should have been broken down to vector 809 * operations already. 810 */ 811 assert(!ir->operands[operand]->type->is_matrix()); 812 } 813 814 int vector_elements = ir->operands[0]->type->vector_elements; 815 if (ir->operands[1]) { 816 vector_elements = MAX2(vector_elements, 817 ir->operands[1]->type->vector_elements); 818 } 819 820 this->result.file = BAD_FILE; 821 822 /* Storage for our result. Ideally for an assignment we'd be using 823 * the actual storage for the result here, instead. 824 */ 825 result_src = src_reg(this, ir->type); 826 /* convenience for the emit functions below. */ 827 result_dst = dst_reg(result_src); 828 /* If nothing special happens, this is the result. */ 829 this->result = result_src; 830 /* Limit writes to the channels that will be used by result_src later. 831 * This does limit this temp's use as a temporary for multi-instruction 832 * sequences. 833 */ 834 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 835 836 switch (ir->operation) { 837 case ir_unop_logic_not: 838 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 839 * ones complement of the whole register, not just bit 0. 840 */ 841 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 842 break; 843 case ir_unop_neg: 844 op[0].negate = !op[0].negate; 845 this->result = op[0]; 846 break; 847 case ir_unop_abs: 848 op[0].abs = true; 849 op[0].negate = false; 850 this->result = op[0]; 851 break; 852 853 case ir_unop_sign: 854 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 855 856 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 857 inst->conditional_mod = BRW_CONDITIONAL_G; 858 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 859 inst->predicate = BRW_PREDICATE_NORMAL; 860 861 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 862 inst->conditional_mod = BRW_CONDITIONAL_L; 863 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 864 inst->predicate = BRW_PREDICATE_NORMAL; 865 866 break; 867 868 case ir_unop_rcp: 869 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 870 break; 871 872 case ir_unop_exp2: 873 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 874 break; 875 case ir_unop_log2: 876 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 877 break; 878 case ir_unop_exp: 879 case ir_unop_log: 880 assert(!"not reached: should be handled by ir_explog_to_explog2"); 881 break; 882 case ir_unop_sin: 883 case ir_unop_sin_reduced: 884 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 885 break; 886 case ir_unop_cos: 887 case ir_unop_cos_reduced: 888 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 889 break; 890 891 case ir_unop_dFdx: 892 case ir_unop_dFdy: 893 assert(!"derivatives not valid in vertex shader"); 894 break; 895 896 case ir_unop_noise: 897 assert(!"not reached: should be handled by lower_noise"); 898 break; 899 900 case ir_binop_add: 901 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 902 break; 903 case ir_binop_sub: 904 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 905 break; 906 907 case ir_binop_mul: 908 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 909 break; 910 case ir_binop_div: 911 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 912 case ir_binop_mod: 913 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 914 break; 915 916 case ir_binop_less: 917 case ir_binop_greater: 918 case ir_binop_lequal: 919 case ir_binop_gequal: 920 case ir_binop_equal: 921 case ir_binop_nequal: { 922 dst_reg temp = result_dst; 923 /* original gen4 does implicit conversion before comparison. */ 924 if (intel->gen < 5) 925 temp.type = op[0].type; 926 927 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 928 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 929 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 930 break; 931 } 932 933 case ir_binop_all_equal: 934 /* "==" operator producing a scalar boolean. */ 935 if (ir->operands[0]->type->is_vector() || 936 ir->operands[1]->type->is_vector()) { 937 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 938 inst->conditional_mod = BRW_CONDITIONAL_Z; 939 940 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 941 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 942 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 943 } else { 944 dst_reg temp = result_dst; 945 /* original gen4 does implicit conversion before comparison. */ 946 if (intel->gen < 5) 947 temp.type = op[0].type; 948 949 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 950 inst->conditional_mod = BRW_CONDITIONAL_NZ; 951 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 952 } 953 break; 954 case ir_binop_any_nequal: 955 /* "!=" operator producing a scalar boolean. */ 956 if (ir->operands[0]->type->is_vector() || 957 ir->operands[1]->type->is_vector()) { 958 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 959 inst->conditional_mod = BRW_CONDITIONAL_NZ; 960 961 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 962 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 963 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 964 } else { 965 dst_reg temp = result_dst; 966 /* original gen4 does implicit conversion before comparison. */ 967 if (intel->gen < 5) 968 temp.type = op[0].type; 969 970 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 971 inst->conditional_mod = BRW_CONDITIONAL_NZ; 972 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 973 } 974 break; 975 976 case ir_unop_any: 977 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 978 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 979 980 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 981 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 982 break; 983 984 case ir_binop_logic_xor: 985 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 986 break; 987 988 case ir_binop_logic_or: 989 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 990 break; 991 992 case ir_binop_logic_and: 993 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 994 break; 995 996 case ir_binop_dot: 997 assert(ir->operands[0]->type->is_vector()); 998 assert(ir->operands[0]->type == ir->operands[1]->type); 999 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1000 break; 1001 1002 case ir_unop_sqrt: 1003 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1004 break; 1005 case ir_unop_rsq: 1006 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1007 break; 1008 case ir_unop_i2f: 1009 case ir_unop_i2u: 1010 case ir_unop_u2i: 1011 case ir_unop_u2f: 1012 case ir_unop_b2f: 1013 case ir_unop_b2i: 1014 case ir_unop_f2i: 1015 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1016 break; 1017 case ir_unop_f2b: 1018 case ir_unop_i2b: { 1019 dst_reg temp = result_dst; 1020 /* original gen4 does implicit conversion before comparison. */ 1021 if (intel->gen < 5) 1022 temp.type = op[0].type; 1023 1024 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1025 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1026 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1027 break; 1028 } 1029 1030 case ir_unop_trunc: 1031 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1032 break; 1033 case ir_unop_ceil: 1034 op[0].negate = !op[0].negate; 1035 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1036 this->result.negate = true; 1037 break; 1038 case ir_unop_floor: 1039 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1040 break; 1041 case ir_unop_fract: 1042 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1043 break; 1044 case ir_unop_round_even: 1045 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1046 break; 1047 1048 case ir_binop_min: 1049 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1050 inst->conditional_mod = BRW_CONDITIONAL_L; 1051 1052 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1053 inst->predicate = BRW_PREDICATE_NORMAL; 1054 break; 1055 case ir_binop_max: 1056 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1057 inst->conditional_mod = BRW_CONDITIONAL_G; 1058 1059 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1060 inst->predicate = BRW_PREDICATE_NORMAL; 1061 break; 1062 1063 case ir_binop_pow: 1064 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1065 break; 1066 1067 case ir_unop_bit_not: 1068 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1069 break; 1070 case ir_binop_bit_and: 1071 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1072 break; 1073 case ir_binop_bit_xor: 1074 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1075 break; 1076 case ir_binop_bit_or: 1077 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1078 break; 1079 1080 case ir_binop_lshift: 1081 case ir_binop_rshift: 1082 assert(!"GLSL 1.30 features unsupported"); 1083 break; 1084 1085 case ir_quadop_vector: 1086 assert(!"not reached: should be handled by lower_quadop_vector"); 1087 break; 1088 } 1089} 1090 1091 1092void 1093vec4_visitor::visit(ir_swizzle *ir) 1094{ 1095 src_reg src; 1096 int i = 0; 1097 int swizzle[4]; 1098 1099 /* Note that this is only swizzles in expressions, not those on the left 1100 * hand side of an assignment, which do write masking. See ir_assignment 1101 * for that. 1102 */ 1103 1104 ir->val->accept(this); 1105 src = this->result; 1106 assert(src.file != BAD_FILE); 1107 1108 if (i < ir->type->vector_elements) { 1109 switch (i) { 1110 case 0: 1111 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1112 break; 1113 case 1: 1114 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1115 break; 1116 case 2: 1117 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1118 break; 1119 case 3: 1120 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1121 break; 1122 } 1123 } 1124 for (; i < 4; i++) { 1125 /* Replicate the last channel out. */ 1126 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1127 } 1128 1129 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1130 1131 this->result = src; 1132} 1133 1134void 1135vec4_visitor::visit(ir_dereference_variable *ir) 1136{ 1137 dst_reg *reg = variable_storage(ir->var); 1138 1139 if (!reg) { 1140 fail("Failed to find variable storage for %s\n", ir->var->name); 1141 this->result = src_reg(brw_null_reg()); 1142 return; 1143 } 1144 1145 this->result = src_reg(*reg); 1146} 1147 1148void 1149vec4_visitor::visit(ir_dereference_array *ir) 1150{ 1151 ir_constant *constant_index; 1152 src_reg src; 1153 int element_size = type_size(ir->type); 1154 1155 constant_index = ir->array_index->constant_expression_value(); 1156 1157 ir->array->accept(this); 1158 src = this->result; 1159 1160 if (constant_index) { 1161 src.reg_offset += constant_index->value.i[0] * element_size; 1162 } else { 1163#if 0 /* Variable array index */ 1164 /* Variable index array dereference. It eats the "vec4" of the 1165 * base of the array and an index that offsets the Mesa register 1166 * index. 1167 */ 1168 ir->array_index->accept(this); 1169 1170 src_reg index_reg; 1171 1172 if (element_size == 1) { 1173 index_reg = this->result; 1174 } else { 1175 index_reg = src_reg(this, glsl_type::float_type); 1176 1177 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1178 this->result, src_reg_for_float(element_size)); 1179 } 1180 1181 src.reladdr = ralloc(mem_ctx, src_reg); 1182 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1183#endif 1184 } 1185 1186 /* If the type is smaller than a vec4, replicate the last channel out. */ 1187 if (ir->type->is_scalar() || ir->type->is_vector()) 1188 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1189 else 1190 src.swizzle = BRW_SWIZZLE_NOOP; 1191 1192 this->result = src; 1193} 1194 1195void 1196vec4_visitor::visit(ir_dereference_record *ir) 1197{ 1198 unsigned int i; 1199 const glsl_type *struct_type = ir->record->type; 1200 int offset = 0; 1201 1202 ir->record->accept(this); 1203 1204 for (i = 0; i < struct_type->length; i++) { 1205 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1206 break; 1207 offset += type_size(struct_type->fields.structure[i].type); 1208 } 1209 1210 /* If the type is smaller than a vec4, replicate the last channel out. */ 1211 if (ir->type->is_scalar() || ir->type->is_vector()) 1212 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1213 else 1214 this->result.swizzle = BRW_SWIZZLE_NOOP; 1215 1216 this->result.reg_offset += offset; 1217} 1218 1219/** 1220 * We want to be careful in assignment setup to hit the actual storage 1221 * instead of potentially using a temporary like we might with the 1222 * ir_dereference handler. 1223 */ 1224static dst_reg 1225get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1226{ 1227 /* The LHS must be a dereference. If the LHS is a variable indexed array 1228 * access of a vector, it must be separated into a series conditional moves 1229 * before reaching this point (see ir_vec_index_to_cond_assign). 1230 */ 1231 assert(ir->as_dereference()); 1232 ir_dereference_array *deref_array = ir->as_dereference_array(); 1233 if (deref_array) { 1234 assert(!deref_array->array->type->is_vector()); 1235 } 1236 1237 /* Use the rvalue deref handler for the most part. We'll ignore 1238 * swizzles in it and write swizzles using writemask, though. 1239 */ 1240 ir->accept(v); 1241 return dst_reg(v->result); 1242} 1243 1244void 1245vec4_visitor::emit_block_move(ir_assignment *ir) 1246{ 1247 ir->rhs->accept(this); 1248 src_reg src = this->result; 1249 1250 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1251 1252 /* FINISHME: This should really set to the correct maximal writemask for each 1253 * FINISHME: component written (in the loops below). 1254 */ 1255 dst.writemask = WRITEMASK_XYZW; 1256 1257 for (int i = 0; i < type_size(ir->lhs->type); i++) { 1258 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1259 if (ir->condition) 1260 inst->predicate = BRW_PREDICATE_NORMAL; 1261 1262 dst.reg_offset++; 1263 src.reg_offset++; 1264 } 1265} 1266 1267void 1268vec4_visitor::visit(ir_assignment *ir) 1269{ 1270 if (!ir->lhs->type->is_scalar() && 1271 !ir->lhs->type->is_vector()) { 1272 emit_block_move(ir); 1273 return; 1274 } 1275 1276 /* Now we're down to just a scalar/vector with writemasks. */ 1277 int i; 1278 1279 ir->rhs->accept(this); 1280 src_reg src = this->result; 1281 1282 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1283 1284 int swizzles[4]; 1285 int first_enabled_chan = 0; 1286 int src_chan = 0; 1287 1288 assert(ir->lhs->type->is_vector()); 1289 dst.writemask = ir->write_mask; 1290 1291 for (int i = 0; i < 4; i++) { 1292 if (dst.writemask & (1 << i)) { 1293 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1294 break; 1295 } 1296 } 1297 1298 /* Swizzle a small RHS vector into the channels being written. 1299 * 1300 * glsl ir treats write_mask as dictating how many channels are 1301 * present on the RHS while in our instructions we need to make 1302 * those channels appear in the slots of the vec4 they're written to. 1303 */ 1304 for (int i = 0; i < 4; i++) { 1305 if (dst.writemask & (1 << i)) 1306 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1307 else 1308 swizzles[i] = first_enabled_chan; 1309 } 1310 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1311 swizzles[2], swizzles[3]); 1312 1313 if (ir->condition) { 1314 emit_bool_to_cond_code(ir->condition); 1315 } 1316 1317 for (i = 0; i < type_size(ir->lhs->type); i++) { 1318 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1319 1320 if (ir->condition) 1321 inst->predicate = BRW_PREDICATE_NORMAL; 1322 1323 dst.reg_offset++; 1324 src.reg_offset++; 1325 } 1326} 1327 1328 1329void 1330vec4_visitor::visit(ir_constant *ir) 1331{ 1332 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1333 src_reg temp_base = src_reg(this, ir->type); 1334 dst_reg temp = dst_reg(temp_base); 1335 1336 foreach_iter(exec_list_iterator, iter, ir->components) { 1337 ir_constant *field_value = (ir_constant *)iter.get(); 1338 int size = type_size(field_value->type); 1339 1340 assert(size > 0); 1341 1342 field_value->accept(this); 1343 src_reg src = this->result; 1344 1345 for (int i = 0; i < (unsigned int)size; i++) { 1346 emit(BRW_OPCODE_MOV, temp, src); 1347 1348 src.reg_offset++; 1349 temp.reg_offset++; 1350 } 1351 } 1352 this->result = temp_base; 1353 return; 1354 } 1355 1356 if (ir->type->is_array()) { 1357 src_reg temp_base = src_reg(this, ir->type); 1358 dst_reg temp = dst_reg(temp_base); 1359 int size = type_size(ir->type->fields.array); 1360 1361 assert(size > 0); 1362 1363 for (unsigned int i = 0; i < ir->type->length; i++) { 1364 ir->array_elements[i]->accept(this); 1365 src_reg src = this->result; 1366 for (int j = 0; j < size; j++) { 1367 emit(BRW_OPCODE_MOV, temp, src); 1368 1369 src.reg_offset++; 1370 temp.reg_offset++; 1371 } 1372 } 1373 this->result = temp_base; 1374 return; 1375 } 1376 1377 if (ir->type->is_matrix()) { 1378 this->result = src_reg(this, ir->type); 1379 dst_reg dst = dst_reg(this->result); 1380 1381 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1382 1383 for (int i = 0; i < ir->type->matrix_columns; i++) { 1384 for (int j = 0; j < ir->type->vector_elements; j++) { 1385 dst.writemask = 1 << j; 1386 emit(BRW_OPCODE_MOV, dst, 1387 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1388 } 1389 dst.reg_offset++; 1390 } 1391 return; 1392 } 1393 1394 this->result = src_reg(this, ir->type); 1395 dst_reg dst = dst_reg(this->result); 1396 1397 for (int i = 0; i < ir->type->vector_elements; i++) { 1398 dst.writemask = 1 << i; 1399 1400 switch (ir->type->base_type) { 1401 case GLSL_TYPE_FLOAT: 1402 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); 1403 break; 1404 case GLSL_TYPE_INT: 1405 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); 1406 break; 1407 case GLSL_TYPE_UINT: 1408 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); 1409 break; 1410 case GLSL_TYPE_BOOL: 1411 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); 1412 break; 1413 default: 1414 assert(!"Non-float/uint/int/bool constant"); 1415 break; 1416 } 1417 } 1418} 1419 1420void 1421vec4_visitor::visit(ir_call *ir) 1422{ 1423 assert(!"not reached"); 1424} 1425 1426void 1427vec4_visitor::visit(ir_texture *ir) 1428{ 1429 assert(!"not reached"); 1430} 1431 1432void 1433vec4_visitor::visit(ir_return *ir) 1434{ 1435 assert(!"not reached"); 1436} 1437 1438void 1439vec4_visitor::visit(ir_discard *ir) 1440{ 1441 assert(!"not reached"); 1442} 1443 1444void 1445vec4_visitor::visit(ir_if *ir) 1446{ 1447 this->base_ir = ir->condition; 1448 ir->condition->accept(this); 1449 assert(this->result.file != BAD_FILE); 1450 1451 /* FINISHME: condcode */ 1452 emit(BRW_OPCODE_IF); 1453 1454 visit_instructions(&ir->then_instructions); 1455 1456 if (!ir->else_instructions.is_empty()) { 1457 this->base_ir = ir->condition; 1458 emit(BRW_OPCODE_ELSE); 1459 1460 visit_instructions(&ir->else_instructions); 1461 } 1462 1463 this->base_ir = ir->condition; 1464 emit(BRW_OPCODE_ENDIF); 1465} 1466 1467int 1468vec4_visitor::emit_vue_header_gen4(int header_mrf) 1469{ 1470 /* Get the position */ 1471 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1472 1473 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1474 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1475 1476 current_annotation = "NDC"; 1477 dst_reg ndc_w = ndc; 1478 ndc_w.writemask = WRITEMASK_W; 1479 src_reg pos_w = pos; 1480 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1481 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1482 1483 dst_reg ndc_xyz = ndc; 1484 ndc_xyz.writemask = WRITEMASK_XYZ; 1485 1486 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1487 1488 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1489 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1490 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1491 GLuint i; 1492 1493 emit(BRW_OPCODE_MOV, header1, 0u); 1494 1495 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1496 assert(!"finishme: psiz"); 1497 src_reg psiz; 1498 1499 header1.writemask = WRITEMASK_W; 1500 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1501 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1502 } 1503 1504 for (i = 0; i < c->key.nr_userclip; i++) { 1505 vec4_instruction *inst; 1506 1507 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1508 pos, src_reg(c->userplane[i])); 1509 inst->conditional_mod = BRW_CONDITIONAL_L; 1510 1511 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1512 inst->predicate = BRW_PREDICATE_NORMAL; 1513 } 1514 1515 /* i965 clipping workaround: 1516 * 1) Test for -ve rhw 1517 * 2) If set, 1518 * set ndc = (0,0,0,0) 1519 * set ucp[6] = 1 1520 * 1521 * Later, clipping will detect ucp[6] and ensure the primitive is 1522 * clipped against all fixed planes. 1523 */ 1524 if (brw->has_negative_rhw_bug) { 1525#if 0 1526 /* FINISHME */ 1527 brw_CMP(p, 1528 vec8(brw_null_reg()), 1529 BRW_CONDITIONAL_L, 1530 brw_swizzle1(ndc, 3), 1531 brw_imm_f(0)); 1532 1533 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1534 brw_MOV(p, ndc, brw_imm_f(0)); 1535 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1536#endif 1537 } 1538 1539 header1.writemask = WRITEMASK_XYZW; 1540 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1541 } else { 1542 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1543 BRW_REGISTER_TYPE_UD), 0u); 1544 } 1545 1546 if (intel->gen == 5) { 1547 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1548 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1549 * dword 4-7 (m2) is the ndc position (set above) 1550 * dword 8-11 (m3) of the vertex header is the 4D space position 1551 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1552 * m6 is a pad so that the vertex element data is aligned 1553 * m7 is the first vertex data we fill, which is the vertex position. 1554 */ 1555 current_annotation = "NDC"; 1556 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1557 1558 current_annotation = "gl_Position"; 1559 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1560 1561 /* user clip distance. */ 1562 header_mrf += 2; 1563 1564 /* Pad so that vertex element data (starts with position) is aligned. */ 1565 header_mrf++; 1566 } else { 1567 /* There are 8 dwords in VUE header pre-Ironlake: 1568 * dword 0-3 (m1) is indices, point width, clip flags. 1569 * dword 4-7 (m2) is ndc position (set above) 1570 * 1571 * dword 8-11 (m3) is the first vertex data, which we always have be the 1572 * vertex position. 1573 */ 1574 current_annotation = "NDC"; 1575 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1576 1577 current_annotation = "gl_Position"; 1578 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1579 } 1580 1581 return header_mrf; 1582} 1583 1584int 1585vec4_visitor::emit_vue_header_gen6(int header_mrf) 1586{ 1587 struct brw_reg reg; 1588 1589 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1590 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1591 * dword 4-7 (m3) is the 4D space position 1592 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1593 * enabled. 1594 * 1595 * m4 or 6 is the first vertex element data we fill, which is 1596 * the vertex position. 1597 */ 1598 1599 current_annotation = "indices, point width, clip flags"; 1600 reg = brw_message_reg(header_mrf++); 1601 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1602 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1603 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1604 src_reg(output_reg[VERT_RESULT_PSIZ])); 1605 } 1606 1607 current_annotation = "gl_Position"; 1608 emit(BRW_OPCODE_MOV, 1609 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1610 1611 current_annotation = "user clip distances"; 1612 if (c->key.nr_userclip) { 1613 for (int i = 0; i < c->key.nr_userclip; i++) { 1614 struct brw_reg m; 1615 if (i < 4) 1616 m = brw_message_reg(header_mrf); 1617 else 1618 m = brw_message_reg(header_mrf + 1); 1619 1620 emit(BRW_OPCODE_DP4, 1621 dst_reg(brw_writemask(m, 1 << (i & 3))), 1622 src_reg(c->userplane[i])); 1623 } 1624 header_mrf += 2; 1625 } 1626 1627 current_annotation = NULL; 1628 1629 return header_mrf; 1630} 1631 1632static int 1633align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1634{ 1635 struct intel_context *intel = &brw->intel; 1636 1637 if (intel->gen >= 6) { 1638 /* URB data written (does not include the message header reg) must 1639 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1640 * section 5.4.3.2.2: URB_INTERLEAVED. 1641 * 1642 * URB entries are allocated on a multiple of 1024 bits, so an 1643 * extra 128 bits written here to make the end align to 256 is 1644 * no problem. 1645 */ 1646 if ((mlen % 2) != 1) 1647 mlen++; 1648 } 1649 1650 return mlen; 1651} 1652 1653/** 1654 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1655 * complete the VS thread. 1656 * 1657 * The VUE layout is documented in Volume 2a. 1658 */ 1659void 1660vec4_visitor::emit_urb_writes() 1661{ 1662 int base_mrf = 1; 1663 int mrf = base_mrf; 1664 int urb_entry_size; 1665 1666 /* FINISHME: edgeflag */ 1667 1668 /* First mrf is the g0-based message header containing URB handles and such, 1669 * which is implied in VS_OPCODE_URB_WRITE. 1670 */ 1671 mrf++; 1672 1673 if (intel->gen >= 6) { 1674 mrf = emit_vue_header_gen6(mrf); 1675 } else { 1676 mrf = emit_vue_header_gen4(mrf); 1677 } 1678 1679 int attr; 1680 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1681 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1682 continue; 1683 1684 /* This is loaded into the VUE header, and thus doesn't occupy 1685 * an attribute slot. 1686 */ 1687 if (attr == VERT_RESULT_PSIZ) 1688 continue; 1689 1690 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1691 1692 /* If this is MRF 15, we can't fit anything more into this URB 1693 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1694 * even-numbered amount of URB write data, which will meet 1695 * gen6's requirements for length alignment. 1696 */ 1697 if (mrf == 15) 1698 break; 1699 } 1700 1701 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1702 inst->base_mrf = base_mrf; 1703 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1704 inst->eot = true; 1705 1706 urb_entry_size = mrf - base_mrf; 1707 1708 for (; attr < VERT_RESULT_MAX; attr++) { 1709 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1710 continue; 1711 fail("Second URB write not supported.\n"); 1712 break; 1713 } 1714 1715 if (intel->gen == 6) 1716 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1717 else 1718 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1719} 1720 1721vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1722 struct gl_shader_program *prog, 1723 struct brw_shader *shader) 1724{ 1725 this->c = c; 1726 this->p = &c->func; 1727 this->brw = p->brw; 1728 this->intel = &brw->intel; 1729 this->ctx = &intel->ctx; 1730 this->prog = prog; 1731 this->shader = shader; 1732 1733 this->mem_ctx = ralloc_context(NULL); 1734 this->failed = false; 1735 1736 this->base_ir = NULL; 1737 this->current_annotation = NULL; 1738 1739 this->c = c; 1740 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1741 this->prog_data = &c->prog_data; 1742 1743 this->variable_ht = hash_table_ctor(0, 1744 hash_table_pointer_hash, 1745 hash_table_pointer_compare); 1746 1747 this->virtual_grf_sizes = NULL; 1748 this->virtual_grf_count = 0; 1749 this->virtual_grf_array_size = 0; 1750 1751 this->uniforms = 0; 1752 1753 this->variable_ht = hash_table_ctor(0, 1754 hash_table_pointer_hash, 1755 hash_table_pointer_compare); 1756} 1757 1758vec4_visitor::~vec4_visitor() 1759{ 1760 hash_table_dtor(this->variable_ht); 1761} 1762 1763 1764void 1765vec4_visitor::fail(const char *format, ...) 1766{ 1767 va_list va; 1768 char *msg; 1769 1770 if (failed) 1771 return; 1772 1773 failed = true; 1774 1775 va_start(va, format); 1776 msg = ralloc_vasprintf(mem_ctx, format, va); 1777 va_end(va); 1778 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1779 1780 this->fail_msg = msg; 1781 1782 if (INTEL_DEBUG & DEBUG_VS) { 1783 fprintf(stderr, "%s", msg); 1784 } 1785} 1786 1787} /* namespace brw */ 1788