brw_vec4_visitor.cpp revision 758c3c2b4588f235def48b2f28c0479a70f7c194
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 this->reladdr = reg.reladdr; 41 42 int swizzles[4]; 43 int next_chan = 0; 44 int last = 0; 45 46 for (int i = 0; i < 4; i++) { 47 if (!(reg.writemask & (1 << i))) 48 continue; 49 50 swizzles[next_chan++] = last = i; 51 } 52 53 for (; next_chan < 4; next_chan++) { 54 swizzles[next_chan] = last; 55 } 56 57 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 58 swizzles[2], swizzles[3]); 59} 60 61dst_reg::dst_reg(src_reg reg) 62{ 63 init(); 64 65 this->file = reg.file; 66 this->reg = reg.reg; 67 this->reg_offset = reg.reg_offset; 68 this->type = reg.type; 69 this->writemask = WRITEMASK_XYZW; 70 this->reladdr = reg.reladdr; 71} 72 73vec4_instruction * 74vec4_visitor::emit(enum opcode opcode, dst_reg dst, 75 src_reg src0, src_reg src1, src_reg src2) 76{ 77 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 78 79 inst->opcode = opcode; 80 inst->dst = dst; 81 inst->src[0] = src0; 82 inst->src[1] = src1; 83 inst->src[2] = src2; 84 inst->ir = this->base_ir; 85 inst->annotation = this->current_annotation; 86 87 this->instructions.push_tail(inst); 88 89 return inst; 90} 91 92 93vec4_instruction * 94vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 95{ 96 return emit(opcode, dst, src0, src1, src_reg()); 97} 98 99vec4_instruction * 100vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 101{ 102 assert(dst.writemask != 0); 103 return emit(opcode, dst, src0, src_reg(), src_reg()); 104} 105 106vec4_instruction * 107vec4_visitor::emit(enum opcode opcode) 108{ 109 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 110} 111 112void 113vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 114{ 115 static enum opcode dot_opcodes[] = { 116 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 117 }; 118 119 emit(dot_opcodes[elements - 2], dst, src0, src1); 120} 121 122void 123vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 124{ 125 /* The gen6 math instruction ignores the source modifiers -- 126 * swizzle, abs, negate, and at least some parts of the register 127 * region description. 128 */ 129 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 130 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 131 132 emit(opcode, dst, temp_src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar() || 547 expr->operation == ir_binop_any_nequal || 548 expr->operation == ir_binop_all_equal); 549 550 expr->operands[i]->accept(this); 551 op[i] = this->result; 552 } 553 554 switch (expr->operation) { 555 case ir_unop_logic_not: 556 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 557 inst->conditional_mod = BRW_CONDITIONAL_Z; 558 return; 559 560 case ir_binop_logic_xor: 561 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 562 inst->conditional_mod = BRW_CONDITIONAL_NZ; 563 return; 564 565 case ir_binop_logic_or: 566 temp = dst_reg(this, glsl_type::bool_type); 567 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 568 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 569 inst->conditional_mod = BRW_CONDITIONAL_NZ; 570 return; 571 572 case ir_binop_logic_and: 573 temp = dst_reg(this, glsl_type::bool_type); 574 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 575 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 576 inst->conditional_mod = BRW_CONDITIONAL_NZ; 577 return; 578 579 case ir_unop_f2b: 580 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 581 inst->conditional_mod = BRW_CONDITIONAL_NZ; 582 return; 583 584 case ir_unop_i2b: 585 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 586 inst->conditional_mod = BRW_CONDITIONAL_NZ; 587 return; 588 589 case ir_binop_greater: 590 case ir_binop_gequal: 591 case ir_binop_less: 592 case ir_binop_lequal: 593 case ir_binop_equal: 594 case ir_binop_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 600 case ir_binop_all_equal: 601 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 602 inst->conditional_mod = BRW_CONDITIONAL_Z; 603 604 inst = emit(BRW_OPCODE_IF); 605 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 606 return; 607 608 case ir_binop_any_nequal: 609 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 610 inst->conditional_mod = BRW_CONDITIONAL_NZ; 611 612 inst = emit(BRW_OPCODE_IF); 613 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 614 return; 615 616 default: 617 assert(!"not reached"); 618 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 619 inst->conditional_mod = BRW_CONDITIONAL_NZ; 620 return; 621 } 622 return; 623 } 624 625 ir->condition->accept(this); 626 627 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 628 this->result, src_reg(0)); 629 inst->conditional_mod = BRW_CONDITIONAL_NZ; 630} 631 632void 633vec4_visitor::visit(ir_variable *ir) 634{ 635 dst_reg *reg = NULL; 636 637 if (variable_storage(ir)) 638 return; 639 640 switch (ir->mode) { 641 case ir_var_in: 642 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 643 break; 644 645 case ir_var_out: 646 reg = new(mem_ctx) dst_reg(this, ir->type); 647 648 for (int i = 0; i < type_size(ir->type); i++) { 649 output_reg[ir->location + i] = *reg; 650 output_reg[ir->location + i].reg_offset = i; 651 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; 652 } 653 break; 654 655 case ir_var_auto: 656 case ir_var_temporary: 657 reg = new(mem_ctx) dst_reg(this, ir->type); 658 break; 659 660 case ir_var_uniform: 661 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 662 663 if (!strncmp(ir->name, "gl_", 3)) { 664 setup_builtin_uniform_values(ir); 665 } else { 666 setup_uniform_values(ir->location, ir->type); 667 } 668 break; 669 670 default: 671 assert(!"not reached"); 672 } 673 674 reg->type = brw_type_for_base_type(ir->type); 675 hash_table_insert(this->variable_ht, reg, ir); 676} 677 678void 679vec4_visitor::visit(ir_loop *ir) 680{ 681 ir_dereference_variable *counter = NULL; 682 683 fail("not yet\n"); 684 685 /* We don't want debugging output to print the whole body of the 686 * loop as the annotation. 687 */ 688 this->base_ir = NULL; 689 690 if (ir->counter != NULL) 691 counter = new(ir) ir_dereference_variable(ir->counter); 692 693 if (ir->from != NULL) { 694 assert(ir->counter != NULL); 695 696 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 697 698 a->accept(this); 699 delete a; 700 } 701 702 emit(BRW_OPCODE_DO); 703 704 if (ir->to) { 705 ir_expression *e = 706 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 707 counter, ir->to); 708 ir_if *if_stmt = new(ir) ir_if(e); 709 710 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 711 712 if_stmt->then_instructions.push_tail(brk); 713 714 if_stmt->accept(this); 715 716 delete if_stmt; 717 delete e; 718 delete brk; 719 } 720 721 visit_instructions(&ir->body_instructions); 722 723 if (ir->increment) { 724 ir_expression *e = 725 new(ir) ir_expression(ir_binop_add, counter->type, 726 counter, ir->increment); 727 728 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 729 730 a->accept(this); 731 delete a; 732 delete e; 733 } 734 735 emit(BRW_OPCODE_WHILE); 736} 737 738void 739vec4_visitor::visit(ir_loop_jump *ir) 740{ 741 switch (ir->mode) { 742 case ir_loop_jump::jump_break: 743 emit(BRW_OPCODE_BREAK); 744 break; 745 case ir_loop_jump::jump_continue: 746 emit(BRW_OPCODE_CONTINUE); 747 break; 748 } 749} 750 751 752void 753vec4_visitor::visit(ir_function_signature *ir) 754{ 755 assert(0); 756 (void)ir; 757} 758 759void 760vec4_visitor::visit(ir_function *ir) 761{ 762 /* Ignore function bodies other than main() -- we shouldn't see calls to 763 * them since they should all be inlined. 764 */ 765 if (strcmp(ir->name, "main") == 0) { 766 const ir_function_signature *sig; 767 exec_list empty; 768 769 sig = ir->matching_signature(&empty); 770 771 assert(sig); 772 773 visit_instructions(&sig->body); 774 } 775} 776 777GLboolean 778vec4_visitor::try_emit_sat(ir_expression *ir) 779{ 780 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 781 if (!sat_src) 782 return false; 783 784 sat_src->accept(this); 785 src_reg src = this->result; 786 787 this->result = src_reg(this, ir->type); 788 vec4_instruction *inst; 789 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 790 inst->saturate = true; 791 792 return true; 793} 794 795void 796vec4_visitor::emit_bool_comparison(unsigned int op, 797 dst_reg dst, src_reg src0, src_reg src1) 798{ 799 /* original gen4 does destination conversion before comparison. */ 800 if (intel->gen < 5) 801 dst.type = src0.type; 802 803 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 804 inst->conditional_mod = brw_conditional_for_comparison(op); 805 806 dst.type = BRW_REGISTER_TYPE_D; 807 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 808} 809 810void 811vec4_visitor::visit(ir_expression *ir) 812{ 813 unsigned int operand; 814 src_reg op[Elements(ir->operands)]; 815 src_reg result_src; 816 dst_reg result_dst; 817 vec4_instruction *inst; 818 819 if (try_emit_sat(ir)) 820 return; 821 822 for (operand = 0; operand < ir->get_num_operands(); operand++) { 823 this->result.file = BAD_FILE; 824 ir->operands[operand]->accept(this); 825 if (this->result.file == BAD_FILE) { 826 printf("Failed to get tree for expression operand:\n"); 827 ir->operands[operand]->print(); 828 exit(1); 829 } 830 op[operand] = this->result; 831 832 /* Matrix expression operands should have been broken down to vector 833 * operations already. 834 */ 835 assert(!ir->operands[operand]->type->is_matrix()); 836 } 837 838 int vector_elements = ir->operands[0]->type->vector_elements; 839 if (ir->operands[1]) { 840 vector_elements = MAX2(vector_elements, 841 ir->operands[1]->type->vector_elements); 842 } 843 844 this->result.file = BAD_FILE; 845 846 /* Storage for our result. Ideally for an assignment we'd be using 847 * the actual storage for the result here, instead. 848 */ 849 result_src = src_reg(this, ir->type); 850 /* convenience for the emit functions below. */ 851 result_dst = dst_reg(result_src); 852 /* If nothing special happens, this is the result. */ 853 this->result = result_src; 854 /* Limit writes to the channels that will be used by result_src later. 855 * This does limit this temp's use as a temporary for multi-instruction 856 * sequences. 857 */ 858 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 859 860 switch (ir->operation) { 861 case ir_unop_logic_not: 862 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 863 * ones complement of the whole register, not just bit 0. 864 */ 865 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 866 break; 867 case ir_unop_neg: 868 op[0].negate = !op[0].negate; 869 this->result = op[0]; 870 break; 871 case ir_unop_abs: 872 op[0].abs = true; 873 op[0].negate = false; 874 this->result = op[0]; 875 break; 876 877 case ir_unop_sign: 878 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 879 880 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 881 inst->conditional_mod = BRW_CONDITIONAL_G; 882 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 883 inst->predicate = BRW_PREDICATE_NORMAL; 884 885 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 886 inst->conditional_mod = BRW_CONDITIONAL_L; 887 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 888 inst->predicate = BRW_PREDICATE_NORMAL; 889 890 break; 891 892 case ir_unop_rcp: 893 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 894 break; 895 896 case ir_unop_exp2: 897 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 898 break; 899 case ir_unop_log2: 900 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 901 break; 902 case ir_unop_exp: 903 case ir_unop_log: 904 assert(!"not reached: should be handled by ir_explog_to_explog2"); 905 break; 906 case ir_unop_sin: 907 case ir_unop_sin_reduced: 908 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 909 break; 910 case ir_unop_cos: 911 case ir_unop_cos_reduced: 912 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 913 break; 914 915 case ir_unop_dFdx: 916 case ir_unop_dFdy: 917 assert(!"derivatives not valid in vertex shader"); 918 break; 919 920 case ir_unop_noise: 921 assert(!"not reached: should be handled by lower_noise"); 922 break; 923 924 case ir_binop_add: 925 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 926 break; 927 case ir_binop_sub: 928 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 929 break; 930 931 case ir_binop_mul: 932 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 933 break; 934 case ir_binop_div: 935 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 936 case ir_binop_mod: 937 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 938 break; 939 940 case ir_binop_less: 941 case ir_binop_greater: 942 case ir_binop_lequal: 943 case ir_binop_gequal: 944 case ir_binop_equal: 945 case ir_binop_nequal: { 946 dst_reg temp = result_dst; 947 /* original gen4 does implicit conversion before comparison. */ 948 if (intel->gen < 5) 949 temp.type = op[0].type; 950 951 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 952 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 953 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 954 break; 955 } 956 957 case ir_binop_all_equal: 958 /* "==" operator producing a scalar boolean. */ 959 if (ir->operands[0]->type->is_vector() || 960 ir->operands[1]->type->is_vector()) { 961 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 962 inst->conditional_mod = BRW_CONDITIONAL_Z; 963 964 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 965 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 966 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 967 } else { 968 dst_reg temp = result_dst; 969 /* original gen4 does implicit conversion before comparison. */ 970 if (intel->gen < 5) 971 temp.type = op[0].type; 972 973 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 974 inst->conditional_mod = BRW_CONDITIONAL_NZ; 975 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 976 } 977 break; 978 case ir_binop_any_nequal: 979 /* "!=" operator producing a scalar boolean. */ 980 if (ir->operands[0]->type->is_vector() || 981 ir->operands[1]->type->is_vector()) { 982 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 983 inst->conditional_mod = BRW_CONDITIONAL_NZ; 984 985 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 986 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 987 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 988 } else { 989 dst_reg temp = result_dst; 990 /* original gen4 does implicit conversion before comparison. */ 991 if (intel->gen < 5) 992 temp.type = op[0].type; 993 994 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 995 inst->conditional_mod = BRW_CONDITIONAL_NZ; 996 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 997 } 998 break; 999 1000 case ir_unop_any: 1001 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 1002 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1003 1004 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1005 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1006 break; 1007 1008 case ir_binop_logic_xor: 1009 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1010 break; 1011 1012 case ir_binop_logic_or: 1013 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1014 break; 1015 1016 case ir_binop_logic_and: 1017 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1018 break; 1019 1020 case ir_binop_dot: 1021 assert(ir->operands[0]->type->is_vector()); 1022 assert(ir->operands[0]->type == ir->operands[1]->type); 1023 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1024 break; 1025 1026 case ir_unop_sqrt: 1027 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1028 break; 1029 case ir_unop_rsq: 1030 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1031 break; 1032 case ir_unop_i2f: 1033 case ir_unop_i2u: 1034 case ir_unop_u2i: 1035 case ir_unop_u2f: 1036 case ir_unop_b2f: 1037 case ir_unop_b2i: 1038 case ir_unop_f2i: 1039 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1040 break; 1041 case ir_unop_f2b: 1042 case ir_unop_i2b: { 1043 dst_reg temp = result_dst; 1044 /* original gen4 does implicit conversion before comparison. */ 1045 if (intel->gen < 5) 1046 temp.type = op[0].type; 1047 1048 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1049 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1050 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1051 break; 1052 } 1053 1054 case ir_unop_trunc: 1055 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1056 break; 1057 case ir_unop_ceil: 1058 op[0].negate = !op[0].negate; 1059 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1060 this->result.negate = true; 1061 break; 1062 case ir_unop_floor: 1063 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1064 break; 1065 case ir_unop_fract: 1066 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1067 break; 1068 case ir_unop_round_even: 1069 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1070 break; 1071 1072 case ir_binop_min: 1073 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1074 inst->conditional_mod = BRW_CONDITIONAL_L; 1075 1076 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1077 inst->predicate = BRW_PREDICATE_NORMAL; 1078 break; 1079 case ir_binop_max: 1080 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1081 inst->conditional_mod = BRW_CONDITIONAL_G; 1082 1083 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1084 inst->predicate = BRW_PREDICATE_NORMAL; 1085 break; 1086 1087 case ir_binop_pow: 1088 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1089 break; 1090 1091 case ir_unop_bit_not: 1092 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1093 break; 1094 case ir_binop_bit_and: 1095 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1096 break; 1097 case ir_binop_bit_xor: 1098 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1099 break; 1100 case ir_binop_bit_or: 1101 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1102 break; 1103 1104 case ir_binop_lshift: 1105 case ir_binop_rshift: 1106 assert(!"GLSL 1.30 features unsupported"); 1107 break; 1108 1109 case ir_quadop_vector: 1110 assert(!"not reached: should be handled by lower_quadop_vector"); 1111 break; 1112 } 1113} 1114 1115 1116void 1117vec4_visitor::visit(ir_swizzle *ir) 1118{ 1119 src_reg src; 1120 int i = 0; 1121 int swizzle[4]; 1122 1123 /* Note that this is only swizzles in expressions, not those on the left 1124 * hand side of an assignment, which do write masking. See ir_assignment 1125 * for that. 1126 */ 1127 1128 ir->val->accept(this); 1129 src = this->result; 1130 assert(src.file != BAD_FILE); 1131 1132 for (i = 0; i < ir->type->vector_elements; i++) { 1133 switch (i) { 1134 case 0: 1135 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1136 break; 1137 case 1: 1138 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1139 break; 1140 case 2: 1141 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1142 break; 1143 case 3: 1144 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1145 break; 1146 } 1147 } 1148 for (; i < 4; i++) { 1149 /* Replicate the last channel out. */ 1150 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1151 } 1152 1153 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1154 1155 this->result = src; 1156} 1157 1158void 1159vec4_visitor::visit(ir_dereference_variable *ir) 1160{ 1161 const struct glsl_type *type = ir->type; 1162 dst_reg *reg = variable_storage(ir->var); 1163 1164 if (!reg) { 1165 fail("Failed to find variable storage for %s\n", ir->var->name); 1166 this->result = src_reg(brw_null_reg()); 1167 return; 1168 } 1169 1170 this->result = src_reg(*reg); 1171 1172 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1173 this->result.swizzle = swizzle_for_size(type->vector_elements); 1174} 1175 1176void 1177vec4_visitor::visit(ir_dereference_array *ir) 1178{ 1179 ir_constant *constant_index; 1180 src_reg src; 1181 int element_size = type_size(ir->type); 1182 1183 constant_index = ir->array_index->constant_expression_value(); 1184 1185 ir->array->accept(this); 1186 src = this->result; 1187 1188 if (constant_index) { 1189 src.reg_offset += constant_index->value.i[0] * element_size; 1190 } else { 1191 /* Variable index array dereference. It eats the "vec4" of the 1192 * base of the array and an index that offsets the Mesa register 1193 * index. 1194 */ 1195 ir->array_index->accept(this); 1196 1197 src_reg index_reg; 1198 1199 if (element_size == 1) { 1200 index_reg = this->result; 1201 } else { 1202 index_reg = src_reg(this, glsl_type::int_type); 1203 1204 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1205 this->result, src_reg(element_size)); 1206 } 1207 1208 if (src.reladdr) { 1209 src_reg temp = src_reg(this, glsl_type::int_type); 1210 1211 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); 1212 1213 index_reg = temp; 1214 } 1215 1216 src.reladdr = ralloc(mem_ctx, src_reg); 1217 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1218 } 1219 1220 /* If the type is smaller than a vec4, replicate the last channel out. */ 1221 if (ir->type->is_scalar() || ir->type->is_vector()) 1222 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1223 else 1224 src.swizzle = BRW_SWIZZLE_NOOP; 1225 src.type = brw_type_for_base_type(ir->type); 1226 1227 this->result = src; 1228} 1229 1230void 1231vec4_visitor::visit(ir_dereference_record *ir) 1232{ 1233 unsigned int i; 1234 const glsl_type *struct_type = ir->record->type; 1235 int offset = 0; 1236 1237 ir->record->accept(this); 1238 1239 for (i = 0; i < struct_type->length; i++) { 1240 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1241 break; 1242 offset += type_size(struct_type->fields.structure[i].type); 1243 } 1244 1245 /* If the type is smaller than a vec4, replicate the last channel out. */ 1246 if (ir->type->is_scalar() || ir->type->is_vector()) 1247 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1248 else 1249 this->result.swizzle = BRW_SWIZZLE_NOOP; 1250 this->result.type = brw_type_for_base_type(ir->type); 1251 1252 this->result.reg_offset += offset; 1253} 1254 1255/** 1256 * We want to be careful in assignment setup to hit the actual storage 1257 * instead of potentially using a temporary like we might with the 1258 * ir_dereference handler. 1259 */ 1260static dst_reg 1261get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1262{ 1263 /* The LHS must be a dereference. If the LHS is a variable indexed array 1264 * access of a vector, it must be separated into a series conditional moves 1265 * before reaching this point (see ir_vec_index_to_cond_assign). 1266 */ 1267 assert(ir->as_dereference()); 1268 ir_dereference_array *deref_array = ir->as_dereference_array(); 1269 if (deref_array) { 1270 assert(!deref_array->array->type->is_vector()); 1271 } 1272 1273 /* Use the rvalue deref handler for the most part. We'll ignore 1274 * swizzles in it and write swizzles using writemask, though. 1275 */ 1276 ir->accept(v); 1277 return dst_reg(v->result); 1278} 1279 1280void 1281vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, 1282 const struct glsl_type *type, bool predicated) 1283{ 1284 if (type->base_type == GLSL_TYPE_STRUCT) { 1285 for (unsigned int i = 0; i < type->length; i++) { 1286 emit_block_move(dst, src, type->fields.structure[i].type, predicated); 1287 } 1288 return; 1289 } 1290 1291 if (type->is_array()) { 1292 for (unsigned int i = 0; i < type->length; i++) { 1293 emit_block_move(dst, src, type->fields.array, predicated); 1294 } 1295 return; 1296 } 1297 1298 if (type->is_matrix()) { 1299 const struct glsl_type *vec_type; 1300 1301 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 1302 type->vector_elements, 1); 1303 1304 for (int i = 0; i < type->matrix_columns; i++) { 1305 emit_block_move(dst, src, vec_type, predicated); 1306 } 1307 return; 1308 } 1309 1310 assert(type->is_scalar() || type->is_vector()); 1311 1312 dst->type = brw_type_for_base_type(type); 1313 src->type = dst->type; 1314 1315 dst->writemask = (1 << type->vector_elements) - 1; 1316 1317 /* Do we need to worry about swizzling a swizzle? */ 1318 assert(src->swizzle = BRW_SWIZZLE_NOOP); 1319 src->swizzle = swizzle_for_size(type->vector_elements); 1320 1321 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); 1322 if (predicated) 1323 inst->predicate = BRW_PREDICATE_NORMAL; 1324 1325 dst->reg_offset++; 1326 src->reg_offset++; 1327} 1328 1329void 1330vec4_visitor::visit(ir_assignment *ir) 1331{ 1332 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1333 1334 if (!ir->lhs->type->is_scalar() && 1335 !ir->lhs->type->is_vector()) { 1336 ir->rhs->accept(this); 1337 src_reg src = this->result; 1338 1339 if (ir->condition) { 1340 emit_bool_to_cond_code(ir->condition); 1341 } 1342 1343 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); 1344 return; 1345 } 1346 1347 /* Now we're down to just a scalar/vector with writemasks. */ 1348 int i; 1349 1350 ir->rhs->accept(this); 1351 src_reg src = this->result; 1352 1353 int swizzles[4]; 1354 int first_enabled_chan = 0; 1355 int src_chan = 0; 1356 1357 assert(ir->lhs->type->is_vector() || 1358 ir->lhs->type->is_scalar()); 1359 dst.writemask = ir->write_mask; 1360 1361 for (int i = 0; i < 4; i++) { 1362 if (dst.writemask & (1 << i)) { 1363 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1364 break; 1365 } 1366 } 1367 1368 /* Swizzle a small RHS vector into the channels being written. 1369 * 1370 * glsl ir treats write_mask as dictating how many channels are 1371 * present on the RHS while in our instructions we need to make 1372 * those channels appear in the slots of the vec4 they're written to. 1373 */ 1374 for (int i = 0; i < 4; i++) { 1375 if (dst.writemask & (1 << i)) 1376 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1377 else 1378 swizzles[i] = first_enabled_chan; 1379 } 1380 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1381 swizzles[2], swizzles[3]); 1382 1383 if (ir->condition) { 1384 emit_bool_to_cond_code(ir->condition); 1385 } 1386 1387 for (i = 0; i < type_size(ir->lhs->type); i++) { 1388 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1389 1390 if (ir->condition) 1391 inst->predicate = BRW_PREDICATE_NORMAL; 1392 1393 dst.reg_offset++; 1394 src.reg_offset++; 1395 } 1396} 1397 1398void 1399vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) 1400{ 1401 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1402 foreach_list(node, &ir->components) { 1403 ir_constant *field_value = (ir_constant *)node; 1404 1405 emit_constant_values(dst, field_value); 1406 } 1407 return; 1408 } 1409 1410 if (ir->type->is_array()) { 1411 for (unsigned int i = 0; i < ir->type->length; i++) { 1412 emit_constant_values(dst, ir->array_elements[i]); 1413 } 1414 return; 1415 } 1416 1417 if (ir->type->is_matrix()) { 1418 for (int i = 0; i < ir->type->matrix_columns; i++) { 1419 for (int j = 0; j < ir->type->vector_elements; j++) { 1420 dst->writemask = 1 << j; 1421 dst->type = BRW_REGISTER_TYPE_F; 1422 1423 emit(BRW_OPCODE_MOV, *dst, 1424 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1425 } 1426 dst->reg_offset++; 1427 } 1428 return; 1429 } 1430 1431 for (int i = 0; i < ir->type->vector_elements; i++) { 1432 dst->writemask = 1 << i; 1433 dst->type = brw_type_for_base_type(ir->type); 1434 1435 switch (ir->type->base_type) { 1436 case GLSL_TYPE_FLOAT: 1437 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); 1438 break; 1439 case GLSL_TYPE_INT: 1440 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); 1441 break; 1442 case GLSL_TYPE_UINT: 1443 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); 1444 break; 1445 case GLSL_TYPE_BOOL: 1446 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); 1447 break; 1448 default: 1449 assert(!"Non-float/uint/int/bool constant"); 1450 break; 1451 } 1452 } 1453 dst->reg_offset++; 1454} 1455 1456void 1457vec4_visitor::visit(ir_constant *ir) 1458{ 1459 dst_reg dst = dst_reg(this, ir->type); 1460 this->result = src_reg(dst); 1461 1462 emit_constant_values(&dst, ir); 1463} 1464 1465void 1466vec4_visitor::visit(ir_call *ir) 1467{ 1468 assert(!"not reached"); 1469} 1470 1471void 1472vec4_visitor::visit(ir_texture *ir) 1473{ 1474 assert(!"not reached"); 1475} 1476 1477void 1478vec4_visitor::visit(ir_return *ir) 1479{ 1480 assert(!"not reached"); 1481} 1482 1483void 1484vec4_visitor::visit(ir_discard *ir) 1485{ 1486 assert(!"not reached"); 1487} 1488 1489void 1490vec4_visitor::visit(ir_if *ir) 1491{ 1492 /* Don't point the annotation at the if statement, because then it plus 1493 * the then and else blocks get printed. 1494 */ 1495 this->base_ir = ir->condition; 1496 1497 if (intel->gen == 6) { 1498 emit_if_gen6(ir); 1499 } else { 1500 emit_bool_to_cond_code(ir->condition); 1501 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1502 inst->predicate = BRW_PREDICATE_NORMAL; 1503 } 1504 1505 visit_instructions(&ir->then_instructions); 1506 1507 if (!ir->else_instructions.is_empty()) { 1508 this->base_ir = ir->condition; 1509 emit(BRW_OPCODE_ELSE); 1510 1511 visit_instructions(&ir->else_instructions); 1512 } 1513 1514 this->base_ir = ir->condition; 1515 emit(BRW_OPCODE_ENDIF); 1516} 1517 1518int 1519vec4_visitor::emit_vue_header_gen4(int header_mrf) 1520{ 1521 /* Get the position */ 1522 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1523 1524 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1525 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1526 1527 current_annotation = "NDC"; 1528 dst_reg ndc_w = ndc; 1529 ndc_w.writemask = WRITEMASK_W; 1530 src_reg pos_w = pos; 1531 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1532 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1533 1534 dst_reg ndc_xyz = ndc; 1535 ndc_xyz.writemask = WRITEMASK_XYZ; 1536 1537 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1538 1539 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1540 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1541 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1542 GLuint i; 1543 1544 emit(BRW_OPCODE_MOV, header1, 0u); 1545 1546 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1547 assert(!"finishme: psiz"); 1548 src_reg psiz; 1549 1550 header1.writemask = WRITEMASK_W; 1551 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1552 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1553 } 1554 1555 for (i = 0; i < c->key.nr_userclip; i++) { 1556 vec4_instruction *inst; 1557 1558 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1559 pos, src_reg(c->userplane[i])); 1560 inst->conditional_mod = BRW_CONDITIONAL_L; 1561 1562 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1563 inst->predicate = BRW_PREDICATE_NORMAL; 1564 } 1565 1566 /* i965 clipping workaround: 1567 * 1) Test for -ve rhw 1568 * 2) If set, 1569 * set ndc = (0,0,0,0) 1570 * set ucp[6] = 1 1571 * 1572 * Later, clipping will detect ucp[6] and ensure the primitive is 1573 * clipped against all fixed planes. 1574 */ 1575 if (brw->has_negative_rhw_bug) { 1576#if 0 1577 /* FINISHME */ 1578 brw_CMP(p, 1579 vec8(brw_null_reg()), 1580 BRW_CONDITIONAL_L, 1581 brw_swizzle1(ndc, 3), 1582 brw_imm_f(0)); 1583 1584 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1585 brw_MOV(p, ndc, brw_imm_f(0)); 1586 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1587#endif 1588 } 1589 1590 header1.writemask = WRITEMASK_XYZW; 1591 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1592 } else { 1593 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1594 BRW_REGISTER_TYPE_UD), 0u); 1595 } 1596 1597 if (intel->gen == 5) { 1598 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1599 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1600 * dword 4-7 (m2) is the ndc position (set above) 1601 * dword 8-11 (m3) of the vertex header is the 4D space position 1602 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1603 * m6 is a pad so that the vertex element data is aligned 1604 * m7 is the first vertex data we fill. 1605 */ 1606 current_annotation = "NDC"; 1607 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1608 1609 current_annotation = "gl_Position"; 1610 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1611 1612 /* user clip distance. */ 1613 header_mrf += 2; 1614 1615 /* Pad so that vertex element data is aligned. */ 1616 header_mrf++; 1617 } else { 1618 /* There are 8 dwords in VUE header pre-Ironlake: 1619 * dword 0-3 (m1) is indices, point width, clip flags. 1620 * dword 4-7 (m2) is ndc position (set above) 1621 * 1622 * dword 8-11 (m3) is the first vertex data. 1623 */ 1624 current_annotation = "NDC"; 1625 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1626 1627 current_annotation = "gl_Position"; 1628 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1629 } 1630 1631 return header_mrf; 1632} 1633 1634int 1635vec4_visitor::emit_vue_header_gen6(int header_mrf) 1636{ 1637 struct brw_reg reg; 1638 1639 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1640 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1641 * dword 4-7 (m3) is the 4D space position 1642 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1643 * enabled. 1644 * 1645 * m4 or 6 is the first vertex element data we fill. 1646 */ 1647 1648 current_annotation = "indices, point width, clip flags"; 1649 reg = brw_message_reg(header_mrf++); 1650 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1651 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1652 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1653 src_reg(output_reg[VERT_RESULT_PSIZ])); 1654 } 1655 1656 current_annotation = "gl_Position"; 1657 emit(BRW_OPCODE_MOV, 1658 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1659 1660 current_annotation = "user clip distances"; 1661 if (c->key.nr_userclip) { 1662 for (int i = 0; i < c->key.nr_userclip; i++) { 1663 struct brw_reg m; 1664 if (i < 4) 1665 m = brw_message_reg(header_mrf); 1666 else 1667 m = brw_message_reg(header_mrf + 1); 1668 1669 emit(BRW_OPCODE_DP4, 1670 dst_reg(brw_writemask(m, 1 << (i & 3))), 1671 src_reg(c->userplane[i])); 1672 } 1673 header_mrf += 2; 1674 } 1675 1676 current_annotation = NULL; 1677 1678 return header_mrf; 1679} 1680 1681static int 1682align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1683{ 1684 struct intel_context *intel = &brw->intel; 1685 1686 if (intel->gen >= 6) { 1687 /* URB data written (does not include the message header reg) must 1688 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1689 * section 5.4.3.2.2: URB_INTERLEAVED. 1690 * 1691 * URB entries are allocated on a multiple of 1024 bits, so an 1692 * extra 128 bits written here to make the end align to 256 is 1693 * no problem. 1694 */ 1695 if ((mlen % 2) != 1) 1696 mlen++; 1697 } 1698 1699 return mlen; 1700} 1701 1702/** 1703 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1704 * complete the VS thread. 1705 * 1706 * The VUE layout is documented in Volume 2a. 1707 */ 1708void 1709vec4_visitor::emit_urb_writes() 1710{ 1711 /* MRF 0 is reserved for the debugger, so start with message header 1712 * in MRF 1. 1713 */ 1714 int base_mrf = 1; 1715 int mrf = base_mrf; 1716 int urb_entry_size; 1717 uint64_t outputs_remaining = c->prog_data.outputs_written; 1718 /* In the process of generating our URB write message contents, we 1719 * may need to unspill a register or load from an array. Those 1720 * reads would use MRFs 14-15. 1721 */ 1722 int max_usable_mrf = 13; 1723 1724 /* FINISHME: edgeflag */ 1725 1726 /* First mrf is the g0-based message header containing URB handles and such, 1727 * which is implied in VS_OPCODE_URB_WRITE. 1728 */ 1729 mrf++; 1730 1731 if (intel->gen >= 6) { 1732 mrf = emit_vue_header_gen6(mrf); 1733 } else { 1734 mrf = emit_vue_header_gen4(mrf); 1735 } 1736 1737 /* Set up the VUE data for the first URB write */ 1738 int attr; 1739 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1740 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1741 continue; 1742 1743 outputs_remaining &= ~BITFIELD64_BIT(attr); 1744 1745 /* This is set up in the VUE header. */ 1746 if (attr == VERT_RESULT_HPOS) 1747 continue; 1748 1749 /* This is loaded into the VUE header, and thus doesn't occupy 1750 * an attribute slot. 1751 */ 1752 if (attr == VERT_RESULT_PSIZ) 1753 continue; 1754 1755 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1756 1757 /* If this was MRF 15, we can't fit anything more into this URB 1758 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1759 * even-numbered amount of URB write data, which will meet 1760 * gen6's requirements for length alignment. 1761 */ 1762 if (mrf > max_usable_mrf) { 1763 attr++; 1764 break; 1765 } 1766 } 1767 1768 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1769 inst->base_mrf = base_mrf; 1770 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1771 inst->eot = !outputs_remaining; 1772 1773 urb_entry_size = mrf - base_mrf; 1774 1775 /* Optional second URB write */ 1776 if (outputs_remaining) { 1777 mrf = base_mrf + 1; 1778 1779 for (; attr < VERT_RESULT_MAX; attr++) { 1780 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1781 continue; 1782 1783 assert(mrf < max_usable_mrf); 1784 1785 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1786 } 1787 1788 inst = emit(VS_OPCODE_URB_WRITE); 1789 inst->base_mrf = base_mrf; 1790 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1791 inst->eot = true; 1792 /* URB destination offset. In the previous write, we got MRFs 1793 * 2-13 minus the one header MRF, so 12 regs. URB offset is in 1794 * URB row increments, and each of our MRFs is half of one of 1795 * those, since we're doing interleaved writes. 1796 */ 1797 inst->offset = (max_usable_mrf - base_mrf) / 2; 1798 1799 urb_entry_size += mrf - base_mrf; 1800 } 1801 1802 if (intel->gen == 6) 1803 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1804 else 1805 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1806} 1807 1808vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1809 struct gl_shader_program *prog, 1810 struct brw_shader *shader) 1811{ 1812 this->c = c; 1813 this->p = &c->func; 1814 this->brw = p->brw; 1815 this->intel = &brw->intel; 1816 this->ctx = &intel->ctx; 1817 this->prog = prog; 1818 this->shader = shader; 1819 1820 this->mem_ctx = ralloc_context(NULL); 1821 this->failed = false; 1822 1823 this->base_ir = NULL; 1824 this->current_annotation = NULL; 1825 1826 this->c = c; 1827 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1828 this->prog_data = &c->prog_data; 1829 1830 this->variable_ht = hash_table_ctor(0, 1831 hash_table_pointer_hash, 1832 hash_table_pointer_compare); 1833 1834 this->virtual_grf_sizes = NULL; 1835 this->virtual_grf_count = 0; 1836 this->virtual_grf_array_size = 0; 1837 1838 this->uniforms = 0; 1839 1840 this->variable_ht = hash_table_ctor(0, 1841 hash_table_pointer_hash, 1842 hash_table_pointer_compare); 1843} 1844 1845vec4_visitor::~vec4_visitor() 1846{ 1847 hash_table_dtor(this->variable_ht); 1848} 1849 1850 1851void 1852vec4_visitor::fail(const char *format, ...) 1853{ 1854 va_list va; 1855 char *msg; 1856 1857 if (failed) 1858 return; 1859 1860 failed = true; 1861 1862 va_start(va, format); 1863 msg = ralloc_vasprintf(mem_ctx, format, va); 1864 va_end(va); 1865 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1866 1867 this->fail_msg = msg; 1868 1869 if (INTEL_DEBUG & DEBUG_VS) { 1870 fprintf(stderr, "%s", msg); 1871 } 1872} 1873 1874} /* namespace brw */ 1875