brw_vec4_visitor.cpp revision 78fac1892a3a7a90eb7baf78903d70649028d27a
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. Move the source to the corresponding slots 126 * of the destination generally work. 127 */ 128 src_reg expanded = src_reg(this, glsl_type::float_type); 129 emit(BRW_OPCODE_MOV, dst, src); 130 src = expanded; 131 132 emit(opcode, dst, src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar()); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_all_equal: 593 case ir_binop_nequal: 594 case ir_binop_any_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 default: 600 assert(!"not reached"); 601 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 603 return; 604 } 605 return; 606 } 607 608 ir->condition->accept(this); 609 610 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 611 this->result, src_reg(0)); 612 inst->conditional_mod = BRW_CONDITIONAL_NZ; 613} 614 615void 616vec4_visitor::visit(ir_variable *ir) 617{ 618 dst_reg *reg = NULL; 619 620 if (variable_storage(ir)) 621 return; 622 623 switch (ir->mode) { 624 case ir_var_in: 625 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 626 break; 627 628 case ir_var_out: 629 reg = new(mem_ctx) dst_reg(this, ir->type); 630 631 for (int i = 0; i < type_size(ir->type); i++) { 632 output_reg[ir->location + i] = *reg; 633 output_reg[ir->location + i].reg_offset = i; 634 } 635 break; 636 637 case ir_var_auto: 638 case ir_var_temporary: 639 reg = new(mem_ctx) dst_reg(this, ir->type); 640 break; 641 642 case ir_var_uniform: 643 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 644 645 if (!strncmp(ir->name, "gl_", 3)) { 646 setup_builtin_uniform_values(ir); 647 } else { 648 setup_uniform_values(ir->location, ir->type); 649 } 650 break; 651 652 default: 653 assert(!"not reached"); 654 } 655 656 reg->type = brw_type_for_base_type(ir->type); 657 hash_table_insert(this->variable_ht, reg, ir); 658} 659 660void 661vec4_visitor::visit(ir_loop *ir) 662{ 663 ir_dereference_variable *counter = NULL; 664 665 /* We don't want debugging output to print the whole body of the 666 * loop as the annotation. 667 */ 668 this->base_ir = NULL; 669 670 if (ir->counter != NULL) 671 counter = new(ir) ir_dereference_variable(ir->counter); 672 673 if (ir->from != NULL) { 674 assert(ir->counter != NULL); 675 676 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 677 678 a->accept(this); 679 delete a; 680 } 681 682 emit(BRW_OPCODE_DO); 683 684 if (ir->to) { 685 ir_expression *e = 686 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 687 counter, ir->to); 688 ir_if *if_stmt = new(ir) ir_if(e); 689 690 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 691 692 if_stmt->then_instructions.push_tail(brk); 693 694 if_stmt->accept(this); 695 696 delete if_stmt; 697 delete e; 698 delete brk; 699 } 700 701 visit_instructions(&ir->body_instructions); 702 703 if (ir->increment) { 704 ir_expression *e = 705 new(ir) ir_expression(ir_binop_add, counter->type, 706 counter, ir->increment); 707 708 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 709 710 a->accept(this); 711 delete a; 712 delete e; 713 } 714 715 emit(BRW_OPCODE_WHILE); 716} 717 718void 719vec4_visitor::visit(ir_loop_jump *ir) 720{ 721 switch (ir->mode) { 722 case ir_loop_jump::jump_break: 723 emit(BRW_OPCODE_BREAK); 724 break; 725 case ir_loop_jump::jump_continue: 726 emit(BRW_OPCODE_CONTINUE); 727 break; 728 } 729} 730 731 732void 733vec4_visitor::visit(ir_function_signature *ir) 734{ 735 assert(0); 736 (void)ir; 737} 738 739void 740vec4_visitor::visit(ir_function *ir) 741{ 742 /* Ignore function bodies other than main() -- we shouldn't see calls to 743 * them since they should all be inlined. 744 */ 745 if (strcmp(ir->name, "main") == 0) { 746 const ir_function_signature *sig; 747 exec_list empty; 748 749 sig = ir->matching_signature(&empty); 750 751 assert(sig); 752 753 visit_instructions(&sig->body); 754 } 755} 756 757GLboolean 758vec4_visitor::try_emit_sat(ir_expression *ir) 759{ 760 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 761 if (!sat_src) 762 return false; 763 764 sat_src->accept(this); 765 src_reg src = this->result; 766 767 this->result = src_reg(this, ir->type); 768 vec4_instruction *inst; 769 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 770 inst->saturate = true; 771 772 return true; 773} 774 775void 776vec4_visitor::emit_bool_comparison(unsigned int op, 777 dst_reg dst, src_reg src0, src_reg src1) 778{ 779 /* original gen4 does destination conversion before comparison. */ 780 if (intel->gen < 5) 781 dst.type = src0.type; 782 783 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 784 inst->conditional_mod = brw_conditional_for_comparison(op); 785 786 dst.type = BRW_REGISTER_TYPE_D; 787 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 788} 789 790void 791vec4_visitor::visit(ir_expression *ir) 792{ 793 unsigned int operand; 794 src_reg op[Elements(ir->operands)]; 795 src_reg result_src; 796 dst_reg result_dst; 797 vec4_instruction *inst; 798 799 if (try_emit_sat(ir)) 800 return; 801 802 for (operand = 0; operand < ir->get_num_operands(); operand++) { 803 this->result.file = BAD_FILE; 804 ir->operands[operand]->accept(this); 805 if (this->result.file == BAD_FILE) { 806 printf("Failed to get tree for expression operand:\n"); 807 ir->operands[operand]->print(); 808 exit(1); 809 } 810 op[operand] = this->result; 811 812 /* Matrix expression operands should have been broken down to vector 813 * operations already. 814 */ 815 assert(!ir->operands[operand]->type->is_matrix()); 816 } 817 818 int vector_elements = ir->operands[0]->type->vector_elements; 819 if (ir->operands[1]) { 820 vector_elements = MAX2(vector_elements, 821 ir->operands[1]->type->vector_elements); 822 } 823 824 this->result.file = BAD_FILE; 825 826 /* Storage for our result. Ideally for an assignment we'd be using 827 * the actual storage for the result here, instead. 828 */ 829 result_src = src_reg(this, ir->type); 830 /* convenience for the emit functions below. */ 831 result_dst = dst_reg(result_src); 832 /* If nothing special happens, this is the result. */ 833 this->result = result_src; 834 /* Limit writes to the channels that will be used by result_src later. 835 * This does limit this temp's use as a temporary for multi-instruction 836 * sequences. 837 */ 838 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 839 840 switch (ir->operation) { 841 case ir_unop_logic_not: 842 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 843 * ones complement of the whole register, not just bit 0. 844 */ 845 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 846 break; 847 case ir_unop_neg: 848 op[0].negate = !op[0].negate; 849 this->result = op[0]; 850 break; 851 case ir_unop_abs: 852 op[0].abs = true; 853 op[0].negate = false; 854 this->result = op[0]; 855 break; 856 857 case ir_unop_sign: 858 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 859 860 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 861 inst->conditional_mod = BRW_CONDITIONAL_G; 862 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 863 inst->predicate = BRW_PREDICATE_NORMAL; 864 865 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 866 inst->conditional_mod = BRW_CONDITIONAL_L; 867 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 868 inst->predicate = BRW_PREDICATE_NORMAL; 869 870 break; 871 872 case ir_unop_rcp: 873 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 874 break; 875 876 case ir_unop_exp2: 877 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 878 break; 879 case ir_unop_log2: 880 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 881 break; 882 case ir_unop_exp: 883 case ir_unop_log: 884 assert(!"not reached: should be handled by ir_explog_to_explog2"); 885 break; 886 case ir_unop_sin: 887 case ir_unop_sin_reduced: 888 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 889 break; 890 case ir_unop_cos: 891 case ir_unop_cos_reduced: 892 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 893 break; 894 895 case ir_unop_dFdx: 896 case ir_unop_dFdy: 897 assert(!"derivatives not valid in vertex shader"); 898 break; 899 900 case ir_unop_noise: 901 assert(!"not reached: should be handled by lower_noise"); 902 break; 903 904 case ir_binop_add: 905 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 906 break; 907 case ir_binop_sub: 908 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 909 break; 910 911 case ir_binop_mul: 912 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 913 break; 914 case ir_binop_div: 915 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 916 case ir_binop_mod: 917 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 918 break; 919 920 case ir_binop_less: 921 case ir_binop_greater: 922 case ir_binop_lequal: 923 case ir_binop_gequal: 924 case ir_binop_equal: 925 case ir_binop_nequal: { 926 dst_reg temp = result_dst; 927 /* original gen4 does implicit conversion before comparison. */ 928 if (intel->gen < 5) 929 temp.type = op[0].type; 930 931 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 932 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 933 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 934 break; 935 } 936 937 case ir_binop_all_equal: 938 /* "==" operator producing a scalar boolean. */ 939 if (ir->operands[0]->type->is_vector() || 940 ir->operands[1]->type->is_vector()) { 941 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 942 inst->conditional_mod = BRW_CONDITIONAL_Z; 943 944 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 945 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 946 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 947 } else { 948 dst_reg temp = result_dst; 949 /* original gen4 does implicit conversion before comparison. */ 950 if (intel->gen < 5) 951 temp.type = op[0].type; 952 953 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 954 inst->conditional_mod = BRW_CONDITIONAL_NZ; 955 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 956 } 957 break; 958 case ir_binop_any_nequal: 959 /* "!=" operator producing a scalar boolean. */ 960 if (ir->operands[0]->type->is_vector() || 961 ir->operands[1]->type->is_vector()) { 962 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 963 inst->conditional_mod = BRW_CONDITIONAL_NZ; 964 965 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 966 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 967 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 968 } else { 969 dst_reg temp = result_dst; 970 /* original gen4 does implicit conversion before comparison. */ 971 if (intel->gen < 5) 972 temp.type = op[0].type; 973 974 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 975 inst->conditional_mod = BRW_CONDITIONAL_NZ; 976 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 977 } 978 break; 979 980 case ir_unop_any: 981 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 982 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 983 984 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 985 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 986 break; 987 988 case ir_binop_logic_xor: 989 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 990 break; 991 992 case ir_binop_logic_or: 993 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 994 break; 995 996 case ir_binop_logic_and: 997 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 998 break; 999 1000 case ir_binop_dot: 1001 assert(ir->operands[0]->type->is_vector()); 1002 assert(ir->operands[0]->type == ir->operands[1]->type); 1003 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1004 break; 1005 1006 case ir_unop_sqrt: 1007 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1008 break; 1009 case ir_unop_rsq: 1010 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1011 break; 1012 case ir_unop_i2f: 1013 case ir_unop_i2u: 1014 case ir_unop_u2i: 1015 case ir_unop_u2f: 1016 case ir_unop_b2f: 1017 case ir_unop_b2i: 1018 case ir_unop_f2i: 1019 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1020 break; 1021 case ir_unop_f2b: 1022 case ir_unop_i2b: { 1023 dst_reg temp = result_dst; 1024 /* original gen4 does implicit conversion before comparison. */ 1025 if (intel->gen < 5) 1026 temp.type = op[0].type; 1027 1028 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1029 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1030 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1031 break; 1032 } 1033 1034 case ir_unop_trunc: 1035 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1036 break; 1037 case ir_unop_ceil: 1038 op[0].negate = !op[0].negate; 1039 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1040 this->result.negate = true; 1041 break; 1042 case ir_unop_floor: 1043 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1044 break; 1045 case ir_unop_fract: 1046 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1047 break; 1048 case ir_unop_round_even: 1049 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1050 break; 1051 1052 case ir_binop_min: 1053 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1054 inst->conditional_mod = BRW_CONDITIONAL_L; 1055 1056 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1057 inst->predicate = BRW_PREDICATE_NORMAL; 1058 break; 1059 case ir_binop_max: 1060 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1061 inst->conditional_mod = BRW_CONDITIONAL_G; 1062 1063 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1064 inst->predicate = BRW_PREDICATE_NORMAL; 1065 break; 1066 1067 case ir_binop_pow: 1068 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1069 break; 1070 1071 case ir_unop_bit_not: 1072 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1073 break; 1074 case ir_binop_bit_and: 1075 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1076 break; 1077 case ir_binop_bit_xor: 1078 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1079 break; 1080 case ir_binop_bit_or: 1081 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1082 break; 1083 1084 case ir_binop_lshift: 1085 case ir_binop_rshift: 1086 assert(!"GLSL 1.30 features unsupported"); 1087 break; 1088 1089 case ir_quadop_vector: 1090 assert(!"not reached: should be handled by lower_quadop_vector"); 1091 break; 1092 } 1093} 1094 1095 1096void 1097vec4_visitor::visit(ir_swizzle *ir) 1098{ 1099 src_reg src; 1100 int i = 0; 1101 int swizzle[4]; 1102 1103 /* Note that this is only swizzles in expressions, not those on the left 1104 * hand side of an assignment, which do write masking. See ir_assignment 1105 * for that. 1106 */ 1107 1108 ir->val->accept(this); 1109 src = this->result; 1110 assert(src.file != BAD_FILE); 1111 1112 if (i < ir->type->vector_elements) { 1113 switch (i) { 1114 case 0: 1115 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1116 break; 1117 case 1: 1118 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1119 break; 1120 case 2: 1121 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1122 break; 1123 case 3: 1124 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1125 break; 1126 } 1127 } 1128 for (; i < 4; i++) { 1129 /* Replicate the last channel out. */ 1130 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1131 } 1132 1133 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1134 1135 this->result = src; 1136} 1137 1138void 1139vec4_visitor::visit(ir_dereference_variable *ir) 1140{ 1141 dst_reg *reg = variable_storage(ir->var); 1142 1143 if (!reg) { 1144 fail("Failed to find variable storage for %s\n", ir->var->name); 1145 this->result = src_reg(brw_null_reg()); 1146 return; 1147 } 1148 1149 this->result = src_reg(*reg); 1150} 1151 1152void 1153vec4_visitor::visit(ir_dereference_array *ir) 1154{ 1155 ir_constant *constant_index; 1156 src_reg src; 1157 int element_size = type_size(ir->type); 1158 1159 constant_index = ir->array_index->constant_expression_value(); 1160 1161 ir->array->accept(this); 1162 src = this->result; 1163 1164 if (constant_index) { 1165 src.reg_offset += constant_index->value.i[0] * element_size; 1166 } else { 1167#if 0 /* Variable array index */ 1168 /* Variable index array dereference. It eats the "vec4" of the 1169 * base of the array and an index that offsets the Mesa register 1170 * index. 1171 */ 1172 ir->array_index->accept(this); 1173 1174 src_reg index_reg; 1175 1176 if (element_size == 1) { 1177 index_reg = this->result; 1178 } else { 1179 index_reg = src_reg(this, glsl_type::float_type); 1180 1181 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1182 this->result, src_reg_for_float(element_size)); 1183 } 1184 1185 src.reladdr = ralloc(mem_ctx, src_reg); 1186 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1187#endif 1188 } 1189 1190 /* If the type is smaller than a vec4, replicate the last channel out. */ 1191 if (ir->type->is_scalar() || ir->type->is_vector()) 1192 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1193 else 1194 src.swizzle = BRW_SWIZZLE_NOOP; 1195 1196 this->result = src; 1197} 1198 1199void 1200vec4_visitor::visit(ir_dereference_record *ir) 1201{ 1202 unsigned int i; 1203 const glsl_type *struct_type = ir->record->type; 1204 int offset = 0; 1205 1206 ir->record->accept(this); 1207 1208 for (i = 0; i < struct_type->length; i++) { 1209 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1210 break; 1211 offset += type_size(struct_type->fields.structure[i].type); 1212 } 1213 1214 /* If the type is smaller than a vec4, replicate the last channel out. */ 1215 if (ir->type->is_scalar() || ir->type->is_vector()) 1216 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1217 else 1218 this->result.swizzle = BRW_SWIZZLE_NOOP; 1219 1220 this->result.reg_offset += offset; 1221} 1222 1223/** 1224 * We want to be careful in assignment setup to hit the actual storage 1225 * instead of potentially using a temporary like we might with the 1226 * ir_dereference handler. 1227 */ 1228static dst_reg 1229get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1230{ 1231 /* The LHS must be a dereference. If the LHS is a variable indexed array 1232 * access of a vector, it must be separated into a series conditional moves 1233 * before reaching this point (see ir_vec_index_to_cond_assign). 1234 */ 1235 assert(ir->as_dereference()); 1236 ir_dereference_array *deref_array = ir->as_dereference_array(); 1237 if (deref_array) { 1238 assert(!deref_array->array->type->is_vector()); 1239 } 1240 1241 /* Use the rvalue deref handler for the most part. We'll ignore 1242 * swizzles in it and write swizzles using writemask, though. 1243 */ 1244 ir->accept(v); 1245 return dst_reg(v->result); 1246} 1247 1248void 1249vec4_visitor::emit_block_move(ir_assignment *ir) 1250{ 1251 ir->rhs->accept(this); 1252 src_reg src = this->result; 1253 1254 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1255 1256 /* FINISHME: This should really set to the correct maximal writemask for each 1257 * FINISHME: component written (in the loops below). 1258 */ 1259 dst.writemask = WRITEMASK_XYZW; 1260 1261 for (int i = 0; i < type_size(ir->lhs->type); i++) { 1262 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1263 if (ir->condition) 1264 inst->predicate = BRW_PREDICATE_NORMAL; 1265 1266 dst.reg_offset++; 1267 src.reg_offset++; 1268 } 1269} 1270 1271void 1272vec4_visitor::visit(ir_assignment *ir) 1273{ 1274 if (!ir->lhs->type->is_scalar() && 1275 !ir->lhs->type->is_vector()) { 1276 emit_block_move(ir); 1277 return; 1278 } 1279 1280 /* Now we're down to just a scalar/vector with writemasks. */ 1281 int i; 1282 1283 ir->rhs->accept(this); 1284 src_reg src = this->result; 1285 1286 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1287 1288 int swizzles[4]; 1289 int first_enabled_chan = 0; 1290 int src_chan = 0; 1291 1292 assert(ir->lhs->type->is_vector() || 1293 ir->lhs->type->is_scalar()); 1294 dst.writemask = ir->write_mask; 1295 1296 for (int i = 0; i < 4; i++) { 1297 if (dst.writemask & (1 << i)) { 1298 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1299 break; 1300 } 1301 } 1302 1303 /* Swizzle a small RHS vector into the channels being written. 1304 * 1305 * glsl ir treats write_mask as dictating how many channels are 1306 * present on the RHS while in our instructions we need to make 1307 * those channels appear in the slots of the vec4 they're written to. 1308 */ 1309 for (int i = 0; i < 4; i++) { 1310 if (dst.writemask & (1 << i)) 1311 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1312 else 1313 swizzles[i] = first_enabled_chan; 1314 } 1315 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1316 swizzles[2], swizzles[3]); 1317 1318 if (ir->condition) { 1319 emit_bool_to_cond_code(ir->condition); 1320 } 1321 1322 for (i = 0; i < type_size(ir->lhs->type); i++) { 1323 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1324 1325 if (ir->condition) 1326 inst->predicate = BRW_PREDICATE_NORMAL; 1327 1328 dst.reg_offset++; 1329 src.reg_offset++; 1330 } 1331} 1332 1333 1334void 1335vec4_visitor::visit(ir_constant *ir) 1336{ 1337 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1338 src_reg temp_base = src_reg(this, ir->type); 1339 dst_reg temp = dst_reg(temp_base); 1340 1341 foreach_iter(exec_list_iterator, iter, ir->components) { 1342 ir_constant *field_value = (ir_constant *)iter.get(); 1343 int size = type_size(field_value->type); 1344 1345 assert(size > 0); 1346 1347 field_value->accept(this); 1348 src_reg src = this->result; 1349 1350 for (int i = 0; i < (unsigned int)size; i++) { 1351 emit(BRW_OPCODE_MOV, temp, src); 1352 1353 src.reg_offset++; 1354 temp.reg_offset++; 1355 } 1356 } 1357 this->result = temp_base; 1358 return; 1359 } 1360 1361 if (ir->type->is_array()) { 1362 src_reg temp_base = src_reg(this, ir->type); 1363 dst_reg temp = dst_reg(temp_base); 1364 int size = type_size(ir->type->fields.array); 1365 1366 assert(size > 0); 1367 1368 for (unsigned int i = 0; i < ir->type->length; i++) { 1369 ir->array_elements[i]->accept(this); 1370 src_reg src = this->result; 1371 for (int j = 0; j < size; j++) { 1372 emit(BRW_OPCODE_MOV, temp, src); 1373 1374 src.reg_offset++; 1375 temp.reg_offset++; 1376 } 1377 } 1378 this->result = temp_base; 1379 return; 1380 } 1381 1382 if (ir->type->is_matrix()) { 1383 this->result = src_reg(this, ir->type); 1384 dst_reg dst = dst_reg(this->result); 1385 1386 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1387 1388 for (int i = 0; i < ir->type->matrix_columns; i++) { 1389 for (int j = 0; j < ir->type->vector_elements; j++) { 1390 dst.writemask = 1 << j; 1391 emit(BRW_OPCODE_MOV, dst, 1392 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1393 } 1394 dst.reg_offset++; 1395 } 1396 return; 1397 } 1398 1399 this->result = src_reg(this, ir->type); 1400 dst_reg dst = dst_reg(this->result); 1401 1402 for (int i = 0; i < ir->type->vector_elements; i++) { 1403 dst.writemask = 1 << i; 1404 1405 switch (ir->type->base_type) { 1406 case GLSL_TYPE_FLOAT: 1407 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); 1408 break; 1409 case GLSL_TYPE_INT: 1410 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); 1411 break; 1412 case GLSL_TYPE_UINT: 1413 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); 1414 break; 1415 case GLSL_TYPE_BOOL: 1416 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); 1417 break; 1418 default: 1419 assert(!"Non-float/uint/int/bool constant"); 1420 break; 1421 } 1422 } 1423} 1424 1425void 1426vec4_visitor::visit(ir_call *ir) 1427{ 1428 assert(!"not reached"); 1429} 1430 1431void 1432vec4_visitor::visit(ir_texture *ir) 1433{ 1434 assert(!"not reached"); 1435} 1436 1437void 1438vec4_visitor::visit(ir_return *ir) 1439{ 1440 assert(!"not reached"); 1441} 1442 1443void 1444vec4_visitor::visit(ir_discard *ir) 1445{ 1446 assert(!"not reached"); 1447} 1448 1449void 1450vec4_visitor::visit(ir_if *ir) 1451{ 1452 this->base_ir = ir->condition; 1453 ir->condition->accept(this); 1454 assert(this->result.file != BAD_FILE); 1455 1456 /* FINISHME: condcode */ 1457 emit(BRW_OPCODE_IF); 1458 1459 visit_instructions(&ir->then_instructions); 1460 1461 if (!ir->else_instructions.is_empty()) { 1462 this->base_ir = ir->condition; 1463 emit(BRW_OPCODE_ELSE); 1464 1465 visit_instructions(&ir->else_instructions); 1466 } 1467 1468 this->base_ir = ir->condition; 1469 emit(BRW_OPCODE_ENDIF); 1470} 1471 1472int 1473vec4_visitor::emit_vue_header_gen4(int header_mrf) 1474{ 1475 /* Get the position */ 1476 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1477 1478 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1479 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1480 1481 current_annotation = "NDC"; 1482 dst_reg ndc_w = ndc; 1483 ndc_w.writemask = WRITEMASK_W; 1484 src_reg pos_w = pos; 1485 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1486 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1487 1488 dst_reg ndc_xyz = ndc; 1489 ndc_xyz.writemask = WRITEMASK_XYZ; 1490 1491 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1492 1493 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1494 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1495 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1496 GLuint i; 1497 1498 emit(BRW_OPCODE_MOV, header1, 0u); 1499 1500 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1501 assert(!"finishme: psiz"); 1502 src_reg psiz; 1503 1504 header1.writemask = WRITEMASK_W; 1505 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1506 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1507 } 1508 1509 for (i = 0; i < c->key.nr_userclip; i++) { 1510 vec4_instruction *inst; 1511 1512 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1513 pos, src_reg(c->userplane[i])); 1514 inst->conditional_mod = BRW_CONDITIONAL_L; 1515 1516 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1517 inst->predicate = BRW_PREDICATE_NORMAL; 1518 } 1519 1520 /* i965 clipping workaround: 1521 * 1) Test for -ve rhw 1522 * 2) If set, 1523 * set ndc = (0,0,0,0) 1524 * set ucp[6] = 1 1525 * 1526 * Later, clipping will detect ucp[6] and ensure the primitive is 1527 * clipped against all fixed planes. 1528 */ 1529 if (brw->has_negative_rhw_bug) { 1530#if 0 1531 /* FINISHME */ 1532 brw_CMP(p, 1533 vec8(brw_null_reg()), 1534 BRW_CONDITIONAL_L, 1535 brw_swizzle1(ndc, 3), 1536 brw_imm_f(0)); 1537 1538 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1539 brw_MOV(p, ndc, brw_imm_f(0)); 1540 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1541#endif 1542 } 1543 1544 header1.writemask = WRITEMASK_XYZW; 1545 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1546 } else { 1547 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1548 BRW_REGISTER_TYPE_UD), 0u); 1549 } 1550 1551 if (intel->gen == 5) { 1552 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1553 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1554 * dword 4-7 (m2) is the ndc position (set above) 1555 * dword 8-11 (m3) of the vertex header is the 4D space position 1556 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1557 * m6 is a pad so that the vertex element data is aligned 1558 * m7 is the first vertex data we fill. 1559 */ 1560 current_annotation = "NDC"; 1561 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1562 1563 current_annotation = "gl_Position"; 1564 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1565 1566 /* user clip distance. */ 1567 header_mrf += 2; 1568 1569 /* Pad so that vertex element data is aligned. */ 1570 header_mrf++; 1571 } else { 1572 /* There are 8 dwords in VUE header pre-Ironlake: 1573 * dword 0-3 (m1) is indices, point width, clip flags. 1574 * dword 4-7 (m2) is ndc position (set above) 1575 * 1576 * dword 8-11 (m3) is the first vertex data. 1577 */ 1578 current_annotation = "NDC"; 1579 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1580 1581 current_annotation = "gl_Position"; 1582 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1583 } 1584 1585 return header_mrf; 1586} 1587 1588int 1589vec4_visitor::emit_vue_header_gen6(int header_mrf) 1590{ 1591 struct brw_reg reg; 1592 1593 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1594 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1595 * dword 4-7 (m3) is the 4D space position 1596 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1597 * enabled. 1598 * 1599 * m4 or 6 is the first vertex element data we fill. 1600 */ 1601 1602 current_annotation = "indices, point width, clip flags"; 1603 reg = brw_message_reg(header_mrf++); 1604 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1605 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1606 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1607 src_reg(output_reg[VERT_RESULT_PSIZ])); 1608 } 1609 1610 current_annotation = "gl_Position"; 1611 emit(BRW_OPCODE_MOV, 1612 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1613 1614 current_annotation = "user clip distances"; 1615 if (c->key.nr_userclip) { 1616 for (int i = 0; i < c->key.nr_userclip; i++) { 1617 struct brw_reg m; 1618 if (i < 4) 1619 m = brw_message_reg(header_mrf); 1620 else 1621 m = brw_message_reg(header_mrf + 1); 1622 1623 emit(BRW_OPCODE_DP4, 1624 dst_reg(brw_writemask(m, 1 << (i & 3))), 1625 src_reg(c->userplane[i])); 1626 } 1627 header_mrf += 2; 1628 } 1629 1630 current_annotation = NULL; 1631 1632 return header_mrf; 1633} 1634 1635static int 1636align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1637{ 1638 struct intel_context *intel = &brw->intel; 1639 1640 if (intel->gen >= 6) { 1641 /* URB data written (does not include the message header reg) must 1642 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1643 * section 5.4.3.2.2: URB_INTERLEAVED. 1644 * 1645 * URB entries are allocated on a multiple of 1024 bits, so an 1646 * extra 128 bits written here to make the end align to 256 is 1647 * no problem. 1648 */ 1649 if ((mlen % 2) != 1) 1650 mlen++; 1651 } 1652 1653 return mlen; 1654} 1655 1656/** 1657 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1658 * complete the VS thread. 1659 * 1660 * The VUE layout is documented in Volume 2a. 1661 */ 1662void 1663vec4_visitor::emit_urb_writes() 1664{ 1665 int base_mrf = 1; 1666 int mrf = base_mrf; 1667 int urb_entry_size; 1668 1669 /* FINISHME: edgeflag */ 1670 1671 /* First mrf is the g0-based message header containing URB handles and such, 1672 * which is implied in VS_OPCODE_URB_WRITE. 1673 */ 1674 mrf++; 1675 1676 if (intel->gen >= 6) { 1677 mrf = emit_vue_header_gen6(mrf); 1678 } else { 1679 mrf = emit_vue_header_gen4(mrf); 1680 } 1681 1682 int attr; 1683 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1684 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1685 continue; 1686 1687 /* This is set up in the VUE header. */ 1688 if (attr == VERT_RESULT_HPOS) 1689 continue; 1690 1691 /* This is loaded into the VUE header, and thus doesn't occupy 1692 * an attribute slot. 1693 */ 1694 if (attr == VERT_RESULT_PSIZ) 1695 continue; 1696 1697 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1698 1699 /* If this is MRF 15, we can't fit anything more into this URB 1700 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1701 * even-numbered amount of URB write data, which will meet 1702 * gen6's requirements for length alignment. 1703 */ 1704 if (mrf == 15) 1705 break; 1706 } 1707 1708 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1709 inst->base_mrf = base_mrf; 1710 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1711 inst->eot = true; 1712 1713 urb_entry_size = mrf - base_mrf; 1714 1715 for (; attr < VERT_RESULT_MAX; attr++) { 1716 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1717 continue; 1718 fail("Second URB write not supported.\n"); 1719 break; 1720 } 1721 1722 if (intel->gen == 6) 1723 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1724 else 1725 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1726} 1727 1728vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1729 struct gl_shader_program *prog, 1730 struct brw_shader *shader) 1731{ 1732 this->c = c; 1733 this->p = &c->func; 1734 this->brw = p->brw; 1735 this->intel = &brw->intel; 1736 this->ctx = &intel->ctx; 1737 this->prog = prog; 1738 this->shader = shader; 1739 1740 this->mem_ctx = ralloc_context(NULL); 1741 this->failed = false; 1742 1743 this->base_ir = NULL; 1744 this->current_annotation = NULL; 1745 1746 this->c = c; 1747 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1748 this->prog_data = &c->prog_data; 1749 1750 this->variable_ht = hash_table_ctor(0, 1751 hash_table_pointer_hash, 1752 hash_table_pointer_compare); 1753 1754 this->virtual_grf_sizes = NULL; 1755 this->virtual_grf_count = 0; 1756 this->virtual_grf_array_size = 0; 1757 1758 this->uniforms = 0; 1759 1760 this->variable_ht = hash_table_ctor(0, 1761 hash_table_pointer_hash, 1762 hash_table_pointer_compare); 1763} 1764 1765vec4_visitor::~vec4_visitor() 1766{ 1767 hash_table_dtor(this->variable_ht); 1768} 1769 1770 1771void 1772vec4_visitor::fail(const char *format, ...) 1773{ 1774 va_list va; 1775 char *msg; 1776 1777 if (failed) 1778 return; 1779 1780 failed = true; 1781 1782 va_start(va, format); 1783 msg = ralloc_vasprintf(mem_ctx, format, va); 1784 va_end(va); 1785 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1786 1787 this->fail_msg = msg; 1788 1789 if (INTEL_DEBUG & DEBUG_VS) { 1790 fprintf(stderr, "%s", msg); 1791 } 1792} 1793 1794} /* namespace brw */ 1795