brw_vec4_visitor.cpp revision aa753c5a14637ede804e8043762693122174bf8c
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. Move the source to the corresponding slots 126 * of the destination generally work. 127 */ 128 src_reg expanded = src_reg(this, glsl_type::float_type); 129 emit(BRW_OPCODE_MOV, dst, src); 130 src = expanded; 131 132 emit(opcode, dst, src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar()); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_all_equal: 593 case ir_binop_nequal: 594 case ir_binop_any_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 default: 600 assert(!"not reached"); 601 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 603 return; 604 } 605 return; 606 } 607 608 ir->condition->accept(this); 609 610 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 611 this->result, src_reg(0)); 612 inst->conditional_mod = BRW_CONDITIONAL_NZ; 613} 614 615void 616vec4_visitor::visit(ir_variable *ir) 617{ 618 dst_reg *reg = NULL; 619 620 if (variable_storage(ir)) 621 return; 622 623 switch (ir->mode) { 624 case ir_var_in: 625 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 626 break; 627 628 case ir_var_out: 629 reg = new(mem_ctx) dst_reg(this, ir->type); 630 631 for (int i = 0; i < type_size(ir->type); i++) { 632 output_reg[ir->location + i] = *reg; 633 output_reg[ir->location + i].reg_offset = i; 634 } 635 break; 636 637 case ir_var_auto: 638 case ir_var_temporary: 639 reg = new(mem_ctx) dst_reg(this, ir->type); 640 break; 641 642 case ir_var_uniform: 643 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 644 645 if (!strncmp(ir->name, "gl_", 3)) { 646 setup_builtin_uniform_values(ir); 647 } else { 648 setup_uniform_values(ir->location, ir->type); 649 } 650 break; 651 652 default: 653 assert(!"not reached"); 654 } 655 656 reg->type = brw_type_for_base_type(ir->type); 657 hash_table_insert(this->variable_ht, reg, ir); 658} 659 660void 661vec4_visitor::visit(ir_loop *ir) 662{ 663 ir_dereference_variable *counter = NULL; 664 665 fail("not yet\n"); 666 667 /* We don't want debugging output to print the whole body of the 668 * loop as the annotation. 669 */ 670 this->base_ir = NULL; 671 672 if (ir->counter != NULL) 673 counter = new(ir) ir_dereference_variable(ir->counter); 674 675 if (ir->from != NULL) { 676 assert(ir->counter != NULL); 677 678 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 679 680 a->accept(this); 681 delete a; 682 } 683 684 emit(BRW_OPCODE_DO); 685 686 if (ir->to) { 687 ir_expression *e = 688 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 689 counter, ir->to); 690 ir_if *if_stmt = new(ir) ir_if(e); 691 692 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 693 694 if_stmt->then_instructions.push_tail(brk); 695 696 if_stmt->accept(this); 697 698 delete if_stmt; 699 delete e; 700 delete brk; 701 } 702 703 visit_instructions(&ir->body_instructions); 704 705 if (ir->increment) { 706 ir_expression *e = 707 new(ir) ir_expression(ir_binop_add, counter->type, 708 counter, ir->increment); 709 710 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 711 712 a->accept(this); 713 delete a; 714 delete e; 715 } 716 717 emit(BRW_OPCODE_WHILE); 718} 719 720void 721vec4_visitor::visit(ir_loop_jump *ir) 722{ 723 switch (ir->mode) { 724 case ir_loop_jump::jump_break: 725 emit(BRW_OPCODE_BREAK); 726 break; 727 case ir_loop_jump::jump_continue: 728 emit(BRW_OPCODE_CONTINUE); 729 break; 730 } 731} 732 733 734void 735vec4_visitor::visit(ir_function_signature *ir) 736{ 737 assert(0); 738 (void)ir; 739} 740 741void 742vec4_visitor::visit(ir_function *ir) 743{ 744 /* Ignore function bodies other than main() -- we shouldn't see calls to 745 * them since they should all be inlined. 746 */ 747 if (strcmp(ir->name, "main") == 0) { 748 const ir_function_signature *sig; 749 exec_list empty; 750 751 sig = ir->matching_signature(&empty); 752 753 assert(sig); 754 755 visit_instructions(&sig->body); 756 } 757} 758 759GLboolean 760vec4_visitor::try_emit_sat(ir_expression *ir) 761{ 762 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 763 if (!sat_src) 764 return false; 765 766 sat_src->accept(this); 767 src_reg src = this->result; 768 769 this->result = src_reg(this, ir->type); 770 vec4_instruction *inst; 771 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 772 inst->saturate = true; 773 774 return true; 775} 776 777void 778vec4_visitor::emit_bool_comparison(unsigned int op, 779 dst_reg dst, src_reg src0, src_reg src1) 780{ 781 /* original gen4 does destination conversion before comparison. */ 782 if (intel->gen < 5) 783 dst.type = src0.type; 784 785 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 786 inst->conditional_mod = brw_conditional_for_comparison(op); 787 788 dst.type = BRW_REGISTER_TYPE_D; 789 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 790} 791 792void 793vec4_visitor::visit(ir_expression *ir) 794{ 795 unsigned int operand; 796 src_reg op[Elements(ir->operands)]; 797 src_reg result_src; 798 dst_reg result_dst; 799 vec4_instruction *inst; 800 801 if (try_emit_sat(ir)) 802 return; 803 804 for (operand = 0; operand < ir->get_num_operands(); operand++) { 805 this->result.file = BAD_FILE; 806 ir->operands[operand]->accept(this); 807 if (this->result.file == BAD_FILE) { 808 printf("Failed to get tree for expression operand:\n"); 809 ir->operands[operand]->print(); 810 exit(1); 811 } 812 op[operand] = this->result; 813 814 /* Matrix expression operands should have been broken down to vector 815 * operations already. 816 */ 817 assert(!ir->operands[operand]->type->is_matrix()); 818 } 819 820 int vector_elements = ir->operands[0]->type->vector_elements; 821 if (ir->operands[1]) { 822 vector_elements = MAX2(vector_elements, 823 ir->operands[1]->type->vector_elements); 824 } 825 826 this->result.file = BAD_FILE; 827 828 /* Storage for our result. Ideally for an assignment we'd be using 829 * the actual storage for the result here, instead. 830 */ 831 result_src = src_reg(this, ir->type); 832 /* convenience for the emit functions below. */ 833 result_dst = dst_reg(result_src); 834 /* If nothing special happens, this is the result. */ 835 this->result = result_src; 836 /* Limit writes to the channels that will be used by result_src later. 837 * This does limit this temp's use as a temporary for multi-instruction 838 * sequences. 839 */ 840 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 841 842 switch (ir->operation) { 843 case ir_unop_logic_not: 844 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 845 * ones complement of the whole register, not just bit 0. 846 */ 847 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 848 break; 849 case ir_unop_neg: 850 op[0].negate = !op[0].negate; 851 this->result = op[0]; 852 break; 853 case ir_unop_abs: 854 op[0].abs = true; 855 op[0].negate = false; 856 this->result = op[0]; 857 break; 858 859 case ir_unop_sign: 860 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 861 862 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 863 inst->conditional_mod = BRW_CONDITIONAL_G; 864 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 865 inst->predicate = BRW_PREDICATE_NORMAL; 866 867 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 868 inst->conditional_mod = BRW_CONDITIONAL_L; 869 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 870 inst->predicate = BRW_PREDICATE_NORMAL; 871 872 break; 873 874 case ir_unop_rcp: 875 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 876 break; 877 878 case ir_unop_exp2: 879 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 880 break; 881 case ir_unop_log2: 882 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 883 break; 884 case ir_unop_exp: 885 case ir_unop_log: 886 assert(!"not reached: should be handled by ir_explog_to_explog2"); 887 break; 888 case ir_unop_sin: 889 case ir_unop_sin_reduced: 890 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 891 break; 892 case ir_unop_cos: 893 case ir_unop_cos_reduced: 894 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 895 break; 896 897 case ir_unop_dFdx: 898 case ir_unop_dFdy: 899 assert(!"derivatives not valid in vertex shader"); 900 break; 901 902 case ir_unop_noise: 903 assert(!"not reached: should be handled by lower_noise"); 904 break; 905 906 case ir_binop_add: 907 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 908 break; 909 case ir_binop_sub: 910 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 911 break; 912 913 case ir_binop_mul: 914 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 915 break; 916 case ir_binop_div: 917 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 918 case ir_binop_mod: 919 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 920 break; 921 922 case ir_binop_less: 923 case ir_binop_greater: 924 case ir_binop_lequal: 925 case ir_binop_gequal: 926 case ir_binop_equal: 927 case ir_binop_nequal: { 928 dst_reg temp = result_dst; 929 /* original gen4 does implicit conversion before comparison. */ 930 if (intel->gen < 5) 931 temp.type = op[0].type; 932 933 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 934 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 935 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 936 break; 937 } 938 939 case ir_binop_all_equal: 940 /* "==" operator producing a scalar boolean. */ 941 if (ir->operands[0]->type->is_vector() || 942 ir->operands[1]->type->is_vector()) { 943 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 944 inst->conditional_mod = BRW_CONDITIONAL_Z; 945 946 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 947 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 948 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 949 } else { 950 dst_reg temp = result_dst; 951 /* original gen4 does implicit conversion before comparison. */ 952 if (intel->gen < 5) 953 temp.type = op[0].type; 954 955 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 956 inst->conditional_mod = BRW_CONDITIONAL_NZ; 957 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 958 } 959 break; 960 case ir_binop_any_nequal: 961 /* "!=" operator producing a scalar boolean. */ 962 if (ir->operands[0]->type->is_vector() || 963 ir->operands[1]->type->is_vector()) { 964 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 965 inst->conditional_mod = BRW_CONDITIONAL_NZ; 966 967 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 968 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 969 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 970 } else { 971 dst_reg temp = result_dst; 972 /* original gen4 does implicit conversion before comparison. */ 973 if (intel->gen < 5) 974 temp.type = op[0].type; 975 976 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 977 inst->conditional_mod = BRW_CONDITIONAL_NZ; 978 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 979 } 980 break; 981 982 case ir_unop_any: 983 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 984 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 985 986 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 987 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 988 break; 989 990 case ir_binop_logic_xor: 991 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 992 break; 993 994 case ir_binop_logic_or: 995 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 996 break; 997 998 case ir_binop_logic_and: 999 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1000 break; 1001 1002 case ir_binop_dot: 1003 assert(ir->operands[0]->type->is_vector()); 1004 assert(ir->operands[0]->type == ir->operands[1]->type); 1005 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1006 break; 1007 1008 case ir_unop_sqrt: 1009 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1010 break; 1011 case ir_unop_rsq: 1012 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1013 break; 1014 case ir_unop_i2f: 1015 case ir_unop_i2u: 1016 case ir_unop_u2i: 1017 case ir_unop_u2f: 1018 case ir_unop_b2f: 1019 case ir_unop_b2i: 1020 case ir_unop_f2i: 1021 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1022 break; 1023 case ir_unop_f2b: 1024 case ir_unop_i2b: { 1025 dst_reg temp = result_dst; 1026 /* original gen4 does implicit conversion before comparison. */ 1027 if (intel->gen < 5) 1028 temp.type = op[0].type; 1029 1030 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1031 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1032 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1033 break; 1034 } 1035 1036 case ir_unop_trunc: 1037 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1038 break; 1039 case ir_unop_ceil: 1040 op[0].negate = !op[0].negate; 1041 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1042 this->result.negate = true; 1043 break; 1044 case ir_unop_floor: 1045 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1046 break; 1047 case ir_unop_fract: 1048 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1049 break; 1050 case ir_unop_round_even: 1051 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1052 break; 1053 1054 case ir_binop_min: 1055 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1056 inst->conditional_mod = BRW_CONDITIONAL_L; 1057 1058 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1059 inst->predicate = BRW_PREDICATE_NORMAL; 1060 break; 1061 case ir_binop_max: 1062 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1063 inst->conditional_mod = BRW_CONDITIONAL_G; 1064 1065 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1066 inst->predicate = BRW_PREDICATE_NORMAL; 1067 break; 1068 1069 case ir_binop_pow: 1070 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1071 break; 1072 1073 case ir_unop_bit_not: 1074 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1075 break; 1076 case ir_binop_bit_and: 1077 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1078 break; 1079 case ir_binop_bit_xor: 1080 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1081 break; 1082 case ir_binop_bit_or: 1083 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1084 break; 1085 1086 case ir_binop_lshift: 1087 case ir_binop_rshift: 1088 assert(!"GLSL 1.30 features unsupported"); 1089 break; 1090 1091 case ir_quadop_vector: 1092 assert(!"not reached: should be handled by lower_quadop_vector"); 1093 break; 1094 } 1095} 1096 1097 1098void 1099vec4_visitor::visit(ir_swizzle *ir) 1100{ 1101 src_reg src; 1102 int i = 0; 1103 int swizzle[4]; 1104 1105 /* Note that this is only swizzles in expressions, not those on the left 1106 * hand side of an assignment, which do write masking. See ir_assignment 1107 * for that. 1108 */ 1109 1110 ir->val->accept(this); 1111 src = this->result; 1112 assert(src.file != BAD_FILE); 1113 1114 for (i = 0; i < ir->type->vector_elements; i++) { 1115 switch (i) { 1116 case 0: 1117 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1118 break; 1119 case 1: 1120 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1121 break; 1122 case 2: 1123 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1124 break; 1125 case 3: 1126 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1127 break; 1128 } 1129 } 1130 for (; i < 4; i++) { 1131 /* Replicate the last channel out. */ 1132 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1133 } 1134 1135 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1136 1137 this->result = src; 1138} 1139 1140void 1141vec4_visitor::visit(ir_dereference_variable *ir) 1142{ 1143 dst_reg *reg = variable_storage(ir->var); 1144 1145 if (!reg) { 1146 fail("Failed to find variable storage for %s\n", ir->var->name); 1147 this->result = src_reg(brw_null_reg()); 1148 return; 1149 } 1150 1151 this->result = src_reg(*reg); 1152} 1153 1154void 1155vec4_visitor::visit(ir_dereference_array *ir) 1156{ 1157 ir_constant *constant_index; 1158 src_reg src; 1159 int element_size = type_size(ir->type); 1160 1161 constant_index = ir->array_index->constant_expression_value(); 1162 1163 ir->array->accept(this); 1164 src = this->result; 1165 1166 if (constant_index) { 1167 src.reg_offset += constant_index->value.i[0] * element_size; 1168 } else { 1169#if 0 /* Variable array index */ 1170 /* Variable index array dereference. It eats the "vec4" of the 1171 * base of the array and an index that offsets the Mesa register 1172 * index. 1173 */ 1174 ir->array_index->accept(this); 1175 1176 src_reg index_reg; 1177 1178 if (element_size == 1) { 1179 index_reg = this->result; 1180 } else { 1181 index_reg = src_reg(this, glsl_type::float_type); 1182 1183 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1184 this->result, src_reg_for_float(element_size)); 1185 } 1186 1187 src.reladdr = ralloc(mem_ctx, src_reg); 1188 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1189#endif 1190 } 1191 1192 /* If the type is smaller than a vec4, replicate the last channel out. */ 1193 if (ir->type->is_scalar() || ir->type->is_vector()) 1194 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1195 else 1196 src.swizzle = BRW_SWIZZLE_NOOP; 1197 1198 this->result = src; 1199} 1200 1201void 1202vec4_visitor::visit(ir_dereference_record *ir) 1203{ 1204 unsigned int i; 1205 const glsl_type *struct_type = ir->record->type; 1206 int offset = 0; 1207 1208 ir->record->accept(this); 1209 1210 for (i = 0; i < struct_type->length; i++) { 1211 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1212 break; 1213 offset += type_size(struct_type->fields.structure[i].type); 1214 } 1215 1216 /* If the type is smaller than a vec4, replicate the last channel out. */ 1217 if (ir->type->is_scalar() || ir->type->is_vector()) 1218 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1219 else 1220 this->result.swizzle = BRW_SWIZZLE_NOOP; 1221 1222 this->result.reg_offset += offset; 1223} 1224 1225/** 1226 * We want to be careful in assignment setup to hit the actual storage 1227 * instead of potentially using a temporary like we might with the 1228 * ir_dereference handler. 1229 */ 1230static dst_reg 1231get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1232{ 1233 /* The LHS must be a dereference. If the LHS is a variable indexed array 1234 * access of a vector, it must be separated into a series conditional moves 1235 * before reaching this point (see ir_vec_index_to_cond_assign). 1236 */ 1237 assert(ir->as_dereference()); 1238 ir_dereference_array *deref_array = ir->as_dereference_array(); 1239 if (deref_array) { 1240 assert(!deref_array->array->type->is_vector()); 1241 } 1242 1243 /* Use the rvalue deref handler for the most part. We'll ignore 1244 * swizzles in it and write swizzles using writemask, though. 1245 */ 1246 ir->accept(v); 1247 return dst_reg(v->result); 1248} 1249 1250void 1251vec4_visitor::emit_block_move(ir_assignment *ir) 1252{ 1253 ir->rhs->accept(this); 1254 src_reg src = this->result; 1255 1256 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1257 1258 /* FINISHME: This should really set to the correct maximal writemask for each 1259 * FINISHME: component written (in the loops below). 1260 */ 1261 dst.writemask = WRITEMASK_XYZW; 1262 1263 for (int i = 0; i < type_size(ir->lhs->type); i++) { 1264 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1265 if (ir->condition) 1266 inst->predicate = BRW_PREDICATE_NORMAL; 1267 1268 dst.reg_offset++; 1269 src.reg_offset++; 1270 } 1271} 1272 1273void 1274vec4_visitor::visit(ir_assignment *ir) 1275{ 1276 if (!ir->lhs->type->is_scalar() && 1277 !ir->lhs->type->is_vector()) { 1278 emit_block_move(ir); 1279 return; 1280 } 1281 1282 /* Now we're down to just a scalar/vector with writemasks. */ 1283 int i; 1284 1285 ir->rhs->accept(this); 1286 src_reg src = this->result; 1287 1288 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1289 1290 int swizzles[4]; 1291 int first_enabled_chan = 0; 1292 int src_chan = 0; 1293 1294 assert(ir->lhs->type->is_vector() || 1295 ir->lhs->type->is_scalar()); 1296 dst.writemask = ir->write_mask; 1297 1298 for (int i = 0; i < 4; i++) { 1299 if (dst.writemask & (1 << i)) { 1300 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1301 break; 1302 } 1303 } 1304 1305 /* Swizzle a small RHS vector into the channels being written. 1306 * 1307 * glsl ir treats write_mask as dictating how many channels are 1308 * present on the RHS while in our instructions we need to make 1309 * those channels appear in the slots of the vec4 they're written to. 1310 */ 1311 for (int i = 0; i < 4; i++) { 1312 if (dst.writemask & (1 << i)) 1313 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1314 else 1315 swizzles[i] = first_enabled_chan; 1316 } 1317 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1318 swizzles[2], swizzles[3]); 1319 1320 if (ir->condition) { 1321 emit_bool_to_cond_code(ir->condition); 1322 } 1323 1324 for (i = 0; i < type_size(ir->lhs->type); i++) { 1325 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1326 1327 if (ir->condition) 1328 inst->predicate = BRW_PREDICATE_NORMAL; 1329 1330 dst.reg_offset++; 1331 src.reg_offset++; 1332 } 1333} 1334 1335 1336void 1337vec4_visitor::visit(ir_constant *ir) 1338{ 1339 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1340 src_reg temp_base = src_reg(this, ir->type); 1341 dst_reg temp = dst_reg(temp_base); 1342 1343 foreach_iter(exec_list_iterator, iter, ir->components) { 1344 ir_constant *field_value = (ir_constant *)iter.get(); 1345 int size = type_size(field_value->type); 1346 1347 assert(size > 0); 1348 1349 field_value->accept(this); 1350 src_reg src = this->result; 1351 1352 for (int i = 0; i < (unsigned int)size; i++) { 1353 emit(BRW_OPCODE_MOV, temp, src); 1354 1355 src.reg_offset++; 1356 temp.reg_offset++; 1357 } 1358 } 1359 this->result = temp_base; 1360 return; 1361 } 1362 1363 if (ir->type->is_array()) { 1364 src_reg temp_base = src_reg(this, ir->type); 1365 dst_reg temp = dst_reg(temp_base); 1366 int size = type_size(ir->type->fields.array); 1367 1368 assert(size > 0); 1369 1370 for (unsigned int i = 0; i < ir->type->length; i++) { 1371 ir->array_elements[i]->accept(this); 1372 src_reg src = this->result; 1373 for (int j = 0; j < size; j++) { 1374 emit(BRW_OPCODE_MOV, temp, src); 1375 1376 src.reg_offset++; 1377 temp.reg_offset++; 1378 } 1379 } 1380 this->result = temp_base; 1381 return; 1382 } 1383 1384 if (ir->type->is_matrix()) { 1385 this->result = src_reg(this, ir->type); 1386 dst_reg dst = dst_reg(this->result); 1387 1388 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1389 1390 for (int i = 0; i < ir->type->matrix_columns; i++) { 1391 for (int j = 0; j < ir->type->vector_elements; j++) { 1392 dst.writemask = 1 << j; 1393 emit(BRW_OPCODE_MOV, dst, 1394 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1395 } 1396 dst.reg_offset++; 1397 } 1398 return; 1399 } 1400 1401 this->result = src_reg(this, ir->type); 1402 dst_reg dst = dst_reg(this->result); 1403 1404 for (int i = 0; i < ir->type->vector_elements; i++) { 1405 dst.writemask = 1 << i; 1406 1407 switch (ir->type->base_type) { 1408 case GLSL_TYPE_FLOAT: 1409 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); 1410 break; 1411 case GLSL_TYPE_INT: 1412 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); 1413 break; 1414 case GLSL_TYPE_UINT: 1415 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); 1416 break; 1417 case GLSL_TYPE_BOOL: 1418 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); 1419 break; 1420 default: 1421 assert(!"Non-float/uint/int/bool constant"); 1422 break; 1423 } 1424 } 1425} 1426 1427void 1428vec4_visitor::visit(ir_call *ir) 1429{ 1430 assert(!"not reached"); 1431} 1432 1433void 1434vec4_visitor::visit(ir_texture *ir) 1435{ 1436 assert(!"not reached"); 1437} 1438 1439void 1440vec4_visitor::visit(ir_return *ir) 1441{ 1442 assert(!"not reached"); 1443} 1444 1445void 1446vec4_visitor::visit(ir_discard *ir) 1447{ 1448 assert(!"not reached"); 1449} 1450 1451void 1452vec4_visitor::visit(ir_if *ir) 1453{ 1454 this->base_ir = ir->condition; 1455 ir->condition->accept(this); 1456 assert(this->result.file != BAD_FILE); 1457 1458 /* FINISHME: condcode */ 1459 emit(BRW_OPCODE_IF); 1460 1461 visit_instructions(&ir->then_instructions); 1462 1463 if (!ir->else_instructions.is_empty()) { 1464 this->base_ir = ir->condition; 1465 emit(BRW_OPCODE_ELSE); 1466 1467 visit_instructions(&ir->else_instructions); 1468 } 1469 1470 this->base_ir = ir->condition; 1471 emit(BRW_OPCODE_ENDIF); 1472} 1473 1474int 1475vec4_visitor::emit_vue_header_gen4(int header_mrf) 1476{ 1477 /* Get the position */ 1478 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1479 1480 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1481 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1482 1483 current_annotation = "NDC"; 1484 dst_reg ndc_w = ndc; 1485 ndc_w.writemask = WRITEMASK_W; 1486 src_reg pos_w = pos; 1487 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1488 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1489 1490 dst_reg ndc_xyz = ndc; 1491 ndc_xyz.writemask = WRITEMASK_XYZ; 1492 1493 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1494 1495 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1496 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1497 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1498 GLuint i; 1499 1500 emit(BRW_OPCODE_MOV, header1, 0u); 1501 1502 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1503 assert(!"finishme: psiz"); 1504 src_reg psiz; 1505 1506 header1.writemask = WRITEMASK_W; 1507 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1508 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1509 } 1510 1511 for (i = 0; i < c->key.nr_userclip; i++) { 1512 vec4_instruction *inst; 1513 1514 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1515 pos, src_reg(c->userplane[i])); 1516 inst->conditional_mod = BRW_CONDITIONAL_L; 1517 1518 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1519 inst->predicate = BRW_PREDICATE_NORMAL; 1520 } 1521 1522 /* i965 clipping workaround: 1523 * 1) Test for -ve rhw 1524 * 2) If set, 1525 * set ndc = (0,0,0,0) 1526 * set ucp[6] = 1 1527 * 1528 * Later, clipping will detect ucp[6] and ensure the primitive is 1529 * clipped against all fixed planes. 1530 */ 1531 if (brw->has_negative_rhw_bug) { 1532#if 0 1533 /* FINISHME */ 1534 brw_CMP(p, 1535 vec8(brw_null_reg()), 1536 BRW_CONDITIONAL_L, 1537 brw_swizzle1(ndc, 3), 1538 brw_imm_f(0)); 1539 1540 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1541 brw_MOV(p, ndc, brw_imm_f(0)); 1542 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1543#endif 1544 } 1545 1546 header1.writemask = WRITEMASK_XYZW; 1547 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1548 } else { 1549 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1550 BRW_REGISTER_TYPE_UD), 0u); 1551 } 1552 1553 if (intel->gen == 5) { 1554 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1555 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1556 * dword 4-7 (m2) is the ndc position (set above) 1557 * dword 8-11 (m3) of the vertex header is the 4D space position 1558 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1559 * m6 is a pad so that the vertex element data is aligned 1560 * m7 is the first vertex data we fill. 1561 */ 1562 current_annotation = "NDC"; 1563 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1564 1565 current_annotation = "gl_Position"; 1566 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1567 1568 /* user clip distance. */ 1569 header_mrf += 2; 1570 1571 /* Pad so that vertex element data is aligned. */ 1572 header_mrf++; 1573 } else { 1574 /* There are 8 dwords in VUE header pre-Ironlake: 1575 * dword 0-3 (m1) is indices, point width, clip flags. 1576 * dword 4-7 (m2) is ndc position (set above) 1577 * 1578 * dword 8-11 (m3) is the first vertex data. 1579 */ 1580 current_annotation = "NDC"; 1581 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1582 1583 current_annotation = "gl_Position"; 1584 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1585 } 1586 1587 return header_mrf; 1588} 1589 1590int 1591vec4_visitor::emit_vue_header_gen6(int header_mrf) 1592{ 1593 struct brw_reg reg; 1594 1595 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1596 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1597 * dword 4-7 (m3) is the 4D space position 1598 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1599 * enabled. 1600 * 1601 * m4 or 6 is the first vertex element data we fill. 1602 */ 1603 1604 current_annotation = "indices, point width, clip flags"; 1605 reg = brw_message_reg(header_mrf++); 1606 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1607 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1608 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1609 src_reg(output_reg[VERT_RESULT_PSIZ])); 1610 } 1611 1612 current_annotation = "gl_Position"; 1613 emit(BRW_OPCODE_MOV, 1614 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1615 1616 current_annotation = "user clip distances"; 1617 if (c->key.nr_userclip) { 1618 for (int i = 0; i < c->key.nr_userclip; i++) { 1619 struct brw_reg m; 1620 if (i < 4) 1621 m = brw_message_reg(header_mrf); 1622 else 1623 m = brw_message_reg(header_mrf + 1); 1624 1625 emit(BRW_OPCODE_DP4, 1626 dst_reg(brw_writemask(m, 1 << (i & 3))), 1627 src_reg(c->userplane[i])); 1628 } 1629 header_mrf += 2; 1630 } 1631 1632 current_annotation = NULL; 1633 1634 return header_mrf; 1635} 1636 1637static int 1638align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1639{ 1640 struct intel_context *intel = &brw->intel; 1641 1642 if (intel->gen >= 6) { 1643 /* URB data written (does not include the message header reg) must 1644 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1645 * section 5.4.3.2.2: URB_INTERLEAVED. 1646 * 1647 * URB entries are allocated on a multiple of 1024 bits, so an 1648 * extra 128 bits written here to make the end align to 256 is 1649 * no problem. 1650 */ 1651 if ((mlen % 2) != 1) 1652 mlen++; 1653 } 1654 1655 return mlen; 1656} 1657 1658/** 1659 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1660 * complete the VS thread. 1661 * 1662 * The VUE layout is documented in Volume 2a. 1663 */ 1664void 1665vec4_visitor::emit_urb_writes() 1666{ 1667 int base_mrf = 1; 1668 int mrf = base_mrf; 1669 int urb_entry_size; 1670 1671 /* FINISHME: edgeflag */ 1672 1673 /* First mrf is the g0-based message header containing URB handles and such, 1674 * which is implied in VS_OPCODE_URB_WRITE. 1675 */ 1676 mrf++; 1677 1678 if (intel->gen >= 6) { 1679 mrf = emit_vue_header_gen6(mrf); 1680 } else { 1681 mrf = emit_vue_header_gen4(mrf); 1682 } 1683 1684 int attr; 1685 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1686 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1687 continue; 1688 1689 /* This is set up in the VUE header. */ 1690 if (attr == VERT_RESULT_HPOS) 1691 continue; 1692 1693 /* This is loaded into the VUE header, and thus doesn't occupy 1694 * an attribute slot. 1695 */ 1696 if (attr == VERT_RESULT_PSIZ) 1697 continue; 1698 1699 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1700 1701 /* If this is MRF 15, we can't fit anything more into this URB 1702 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1703 * even-numbered amount of URB write data, which will meet 1704 * gen6's requirements for length alignment. 1705 */ 1706 if (mrf == 15) 1707 break; 1708 } 1709 1710 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1711 inst->base_mrf = base_mrf; 1712 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1713 inst->eot = true; 1714 1715 urb_entry_size = mrf - base_mrf; 1716 1717 for (; attr < VERT_RESULT_MAX; attr++) { 1718 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1719 continue; 1720 fail("Second URB write not supported.\n"); 1721 break; 1722 } 1723 1724 if (intel->gen == 6) 1725 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1726 else 1727 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1728} 1729 1730vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1731 struct gl_shader_program *prog, 1732 struct brw_shader *shader) 1733{ 1734 this->c = c; 1735 this->p = &c->func; 1736 this->brw = p->brw; 1737 this->intel = &brw->intel; 1738 this->ctx = &intel->ctx; 1739 this->prog = prog; 1740 this->shader = shader; 1741 1742 this->mem_ctx = ralloc_context(NULL); 1743 this->failed = false; 1744 1745 this->base_ir = NULL; 1746 this->current_annotation = NULL; 1747 1748 this->c = c; 1749 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1750 this->prog_data = &c->prog_data; 1751 1752 this->variable_ht = hash_table_ctor(0, 1753 hash_table_pointer_hash, 1754 hash_table_pointer_compare); 1755 1756 this->virtual_grf_sizes = NULL; 1757 this->virtual_grf_count = 0; 1758 this->virtual_grf_array_size = 0; 1759 1760 this->uniforms = 0; 1761 1762 this->variable_ht = hash_table_ctor(0, 1763 hash_table_pointer_hash, 1764 hash_table_pointer_compare); 1765} 1766 1767vec4_visitor::~vec4_visitor() 1768{ 1769 hash_table_dtor(this->variable_ht); 1770} 1771 1772 1773void 1774vec4_visitor::fail(const char *format, ...) 1775{ 1776 va_list va; 1777 char *msg; 1778 1779 if (failed) 1780 return; 1781 1782 failed = true; 1783 1784 va_start(va, format); 1785 msg = ralloc_vasprintf(mem_ctx, format, va); 1786 va_end(va); 1787 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1788 1789 this->fail_msg = msg; 1790 1791 if (INTEL_DEBUG & DEBUG_VS) { 1792 fprintf(stderr, "%s", msg); 1793 } 1794} 1795 1796} /* namespace brw */ 1797