brw_vec4_visitor.cpp revision 6408b0295f5c8be6fea891a025d79752484721b6
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 this->reladdr = reg.reladdr; 41 42 int swizzles[4]; 43 int next_chan = 0; 44 int last = 0; 45 46 for (int i = 0; i < 4; i++) { 47 if (!(reg.writemask & (1 << i))) 48 continue; 49 50 swizzles[next_chan++] = last = i; 51 } 52 53 for (; next_chan < 4; next_chan++) { 54 swizzles[next_chan] = last; 55 } 56 57 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 58 swizzles[2], swizzles[3]); 59} 60 61dst_reg::dst_reg(src_reg reg) 62{ 63 init(); 64 65 this->file = reg.file; 66 this->reg = reg.reg; 67 this->reg_offset = reg.reg_offset; 68 this->type = reg.type; 69 this->writemask = WRITEMASK_XYZW; 70 this->reladdr = reg.reladdr; 71} 72 73vec4_instruction * 74vec4_visitor::emit(enum opcode opcode, dst_reg dst, 75 src_reg src0, src_reg src1, src_reg src2) 76{ 77 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 78 79 inst->opcode = opcode; 80 inst->dst = dst; 81 inst->src[0] = src0; 82 inst->src[1] = src1; 83 inst->src[2] = src2; 84 inst->ir = this->base_ir; 85 inst->annotation = this->current_annotation; 86 87 this->instructions.push_tail(inst); 88 89 return inst; 90} 91 92 93vec4_instruction * 94vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 95{ 96 return emit(opcode, dst, src0, src1, src_reg()); 97} 98 99vec4_instruction * 100vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 101{ 102 assert(dst.writemask != 0); 103 return emit(opcode, dst, src0, src_reg(), src_reg()); 104} 105 106vec4_instruction * 107vec4_visitor::emit(enum opcode opcode) 108{ 109 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 110} 111 112void 113vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 114{ 115 static enum opcode dot_opcodes[] = { 116 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 117 }; 118 119 emit(dot_opcodes[elements - 2], dst, src0, src1); 120} 121 122void 123vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 124{ 125 /* The gen6 math instruction ignores the source modifiers -- 126 * swizzle, abs, negate, and at least some parts of the register 127 * region description. 128 */ 129 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 130 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 131 132 emit(opcode, dst, temp_src); 133} 134 135void 136vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 137{ 138 vec4_instruction *inst = emit(opcode, dst, src); 139 inst->base_mrf = 1; 140 inst->mlen = 1; 141} 142 143void 144vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 145{ 146 switch (opcode) { 147 case SHADER_OPCODE_RCP: 148 case SHADER_OPCODE_RSQ: 149 case SHADER_OPCODE_SQRT: 150 case SHADER_OPCODE_EXP2: 151 case SHADER_OPCODE_LOG2: 152 case SHADER_OPCODE_SIN: 153 case SHADER_OPCODE_COS: 154 break; 155 default: 156 assert(!"not reached: bad math opcode"); 157 return; 158 } 159 160 if (intel->gen >= 6) { 161 return emit_math1_gen6(opcode, dst, src); 162 } else { 163 return emit_math1_gen4(opcode, dst, src); 164 } 165} 166 167void 168vec4_visitor::emit_math2_gen6(enum opcode opcode, 169 dst_reg dst, src_reg src0, src_reg src1) 170{ 171 src_reg expanded; 172 173 /* The gen6 math instruction ignores the source modifiers -- 174 * swizzle, abs, negate, and at least some parts of the register 175 * region description. Move the sources to temporaries to make it 176 * generally work. 177 */ 178 179 expanded = src_reg(this, glsl_type::vec4_type); 180 emit(BRW_OPCODE_MOV, dst, src0); 181 src0 = expanded; 182 183 expanded = src_reg(this, glsl_type::vec4_type); 184 emit(BRW_OPCODE_MOV, dst, src1); 185 src1 = expanded; 186 187 emit(opcode, dst, src0, src1); 188} 189 190void 191vec4_visitor::emit_math2_gen4(enum opcode opcode, 192 dst_reg dst, src_reg src0, src_reg src1) 193{ 194 vec4_instruction *inst = emit(opcode, dst, src0, src1); 195 inst->base_mrf = 1; 196 inst->mlen = 2; 197} 198 199void 200vec4_visitor::emit_math(enum opcode opcode, 201 dst_reg dst, src_reg src0, src_reg src1) 202{ 203 assert(opcode == SHADER_OPCODE_POW); 204 205 if (intel->gen >= 6) { 206 return emit_math2_gen6(opcode, dst, src0, src1); 207 } else { 208 return emit_math2_gen4(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::visit_instructions(const exec_list *list) 214{ 215 foreach_iter(exec_list_iterator, iter, *list) { 216 ir_instruction *ir = (ir_instruction *)iter.get(); 217 218 base_ir = ir; 219 ir->accept(this); 220 } 221} 222 223 224static int 225type_size(const struct glsl_type *type) 226{ 227 unsigned int i; 228 int size; 229 230 switch (type->base_type) { 231 case GLSL_TYPE_UINT: 232 case GLSL_TYPE_INT: 233 case GLSL_TYPE_FLOAT: 234 case GLSL_TYPE_BOOL: 235 if (type->is_matrix()) { 236 return type->matrix_columns; 237 } else { 238 /* Regardless of size of vector, it gets a vec4. This is bad 239 * packing for things like floats, but otherwise arrays become a 240 * mess. Hopefully a later pass over the code can pack scalars 241 * down if appropriate. 242 */ 243 return 1; 244 } 245 case GLSL_TYPE_ARRAY: 246 assert(type->length > 0); 247 return type_size(type->fields.array) * type->length; 248 case GLSL_TYPE_STRUCT: 249 size = 0; 250 for (i = 0; i < type->length; i++) { 251 size += type_size(type->fields.structure[i].type); 252 } 253 return size; 254 case GLSL_TYPE_SAMPLER: 255 /* Samplers take up one slot in UNIFORMS[], but they're baked in 256 * at link time. 257 */ 258 return 1; 259 default: 260 assert(0); 261 return 0; 262 } 263} 264 265int 266vec4_visitor::virtual_grf_alloc(int size) 267{ 268 if (virtual_grf_array_size <= virtual_grf_count) { 269 if (virtual_grf_array_size == 0) 270 virtual_grf_array_size = 16; 271 else 272 virtual_grf_array_size *= 2; 273 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 274 virtual_grf_array_size); 275 } 276 virtual_grf_sizes[virtual_grf_count] = size; 277 return virtual_grf_count++; 278} 279 280src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 281{ 282 init(); 283 284 this->file = GRF; 285 this->reg = v->virtual_grf_alloc(type_size(type)); 286 287 if (type->is_array() || type->is_record()) { 288 this->swizzle = BRW_SWIZZLE_NOOP; 289 } else { 290 this->swizzle = swizzle_for_size(type->vector_elements); 291 } 292 293 this->type = brw_type_for_base_type(type); 294} 295 296dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 297{ 298 init(); 299 300 this->file = GRF; 301 this->reg = v->virtual_grf_alloc(type_size(type)); 302 303 if (type->is_array() || type->is_record()) { 304 this->writemask = WRITEMASK_XYZW; 305 } else { 306 this->writemask = (1 << type->vector_elements) - 1; 307 } 308 309 this->type = brw_type_for_base_type(type); 310} 311 312/* Our support for uniforms is piggy-backed on the struct 313 * gl_fragment_program, because that's where the values actually 314 * get stored, rather than in some global gl_shader_program uniform 315 * store. 316 */ 317int 318vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 319{ 320 unsigned int offset = 0; 321 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 322 323 if (type->is_matrix()) { 324 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 325 type->vector_elements, 326 1); 327 328 for (unsigned int i = 0; i < type->matrix_columns; i++) { 329 offset += setup_uniform_values(loc + offset, column); 330 } 331 332 return offset; 333 } 334 335 switch (type->base_type) { 336 case GLSL_TYPE_FLOAT: 337 case GLSL_TYPE_UINT: 338 case GLSL_TYPE_INT: 339 case GLSL_TYPE_BOOL: 340 for (unsigned int i = 0; i < type->vector_elements; i++) { 341 int slot = this->uniforms * 4 + i; 342 switch (type->base_type) { 343 case GLSL_TYPE_FLOAT: 344 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 345 break; 346 case GLSL_TYPE_UINT: 347 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 348 break; 349 case GLSL_TYPE_INT: 350 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 351 break; 352 case GLSL_TYPE_BOOL: 353 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 354 break; 355 default: 356 assert(!"not reached"); 357 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 358 break; 359 } 360 c->prog_data.param[slot] = &values[i]; 361 } 362 363 for (unsigned int i = type->vector_elements; i < 4; i++) { 364 c->prog_data.param_convert[this->uniforms * 4 + i] = 365 PARAM_CONVERT_ZERO; 366 c->prog_data.param[this->uniforms * 4 + i] = NULL; 367 } 368 369 this->uniform_size[this->uniforms] = type->vector_elements; 370 this->uniforms++; 371 372 return 1; 373 374 case GLSL_TYPE_STRUCT: 375 for (unsigned int i = 0; i < type->length; i++) { 376 offset += setup_uniform_values(loc + offset, 377 type->fields.structure[i].type); 378 } 379 return offset; 380 381 case GLSL_TYPE_ARRAY: 382 for (unsigned int i = 0; i < type->length; i++) { 383 offset += setup_uniform_values(loc + offset, type->fields.array); 384 } 385 return offset; 386 387 case GLSL_TYPE_SAMPLER: 388 /* The sampler takes up a slot, but we don't use any values from it. */ 389 return 1; 390 391 default: 392 assert(!"not reached"); 393 return 0; 394 } 395} 396 397/* Our support for builtin uniforms is even scarier than non-builtin. 398 * It sits on top of the PROG_STATE_VAR parameters that are 399 * automatically updated from GL context state. 400 */ 401void 402vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 403{ 404 const ir_state_slot *const slots = ir->state_slots; 405 assert(ir->state_slots != NULL); 406 407 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 408 /* This state reference has already been setup by ir_to_mesa, 409 * but we'll get the same index back here. We can reference 410 * ParameterValues directly, since unlike brw_fs.cpp, we never 411 * add new state references during compile. 412 */ 413 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 414 (gl_state_index *)slots[i].tokens); 415 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 416 417 this->uniform_size[this->uniforms] = 0; 418 /* Add each of the unique swizzled channels of the element. 419 * This will end up matching the size of the glsl_type of this field. 420 */ 421 int last_swiz = -1; 422 for (unsigned int j = 0; j < 4; j++) { 423 int swiz = GET_SWZ(slots[i].swizzle, j); 424 if (swiz == last_swiz) 425 break; 426 last_swiz = swiz; 427 428 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 429 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 430 this->uniform_size[this->uniforms]++; 431 } 432 this->uniforms++; 433 } 434} 435 436dst_reg * 437vec4_visitor::variable_storage(ir_variable *var) 438{ 439 return (dst_reg *)hash_table_find(this->variable_ht, var); 440} 441 442void 443vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 444{ 445 ir_expression *expr = ir->as_expression(); 446 447 if (expr) { 448 src_reg op[2]; 449 vec4_instruction *inst; 450 451 assert(expr->get_num_operands() <= 2); 452 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 453 assert(expr->operands[i]->type->is_scalar()); 454 455 expr->operands[i]->accept(this); 456 op[i] = this->result; 457 } 458 459 switch (expr->operation) { 460 case ir_unop_logic_not: 461 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 462 inst->conditional_mod = BRW_CONDITIONAL_Z; 463 break; 464 465 case ir_binop_logic_xor: 466 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 467 inst->conditional_mod = BRW_CONDITIONAL_NZ; 468 break; 469 470 case ir_binop_logic_or: 471 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 472 inst->conditional_mod = BRW_CONDITIONAL_NZ; 473 break; 474 475 case ir_binop_logic_and: 476 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 477 inst->conditional_mod = BRW_CONDITIONAL_NZ; 478 break; 479 480 case ir_unop_f2b: 481 if (intel->gen >= 6) { 482 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 483 } else { 484 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 485 } 486 inst->conditional_mod = BRW_CONDITIONAL_NZ; 487 break; 488 489 case ir_unop_i2b: 490 if (intel->gen >= 6) { 491 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 492 } else { 493 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 494 } 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_greater: 499 case ir_binop_gequal: 500 case ir_binop_less: 501 case ir_binop_lequal: 502 case ir_binop_equal: 503 case ir_binop_all_equal: 504 case ir_binop_nequal: 505 case ir_binop_any_nequal: 506 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 507 inst->conditional_mod = 508 brw_conditional_for_comparison(expr->operation); 509 break; 510 511 default: 512 assert(!"not reached"); 513 break; 514 } 515 return; 516 } 517 518 ir->accept(this); 519 520 if (intel->gen >= 6) { 521 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 522 this->result, src_reg(1)); 523 inst->conditional_mod = BRW_CONDITIONAL_NZ; 524 } else { 525 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 526 inst->conditional_mod = BRW_CONDITIONAL_NZ; 527 } 528} 529 530/** 531 * Emit a gen6 IF statement with the comparison folded into the IF 532 * instruction. 533 */ 534void 535vec4_visitor::emit_if_gen6(ir_if *ir) 536{ 537 ir_expression *expr = ir->condition->as_expression(); 538 539 if (expr) { 540 src_reg op[2]; 541 vec4_instruction *inst; 542 dst_reg temp; 543 544 assert(expr->get_num_operands() <= 2); 545 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 546 assert(expr->operands[i]->type->is_scalar() || 547 expr->operation == ir_binop_any_nequal || 548 expr->operation == ir_binop_all_equal); 549 550 expr->operands[i]->accept(this); 551 op[i] = this->result; 552 } 553 554 switch (expr->operation) { 555 case ir_unop_logic_not: 556 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 557 inst->conditional_mod = BRW_CONDITIONAL_Z; 558 return; 559 560 case ir_binop_logic_xor: 561 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 562 inst->conditional_mod = BRW_CONDITIONAL_NZ; 563 return; 564 565 case ir_binop_logic_or: 566 temp = dst_reg(this, glsl_type::bool_type); 567 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 568 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 569 inst->conditional_mod = BRW_CONDITIONAL_NZ; 570 return; 571 572 case ir_binop_logic_and: 573 temp = dst_reg(this, glsl_type::bool_type); 574 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 575 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 576 inst->conditional_mod = BRW_CONDITIONAL_NZ; 577 return; 578 579 case ir_unop_f2b: 580 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 581 inst->conditional_mod = BRW_CONDITIONAL_NZ; 582 return; 583 584 case ir_unop_i2b: 585 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 586 inst->conditional_mod = BRW_CONDITIONAL_NZ; 587 return; 588 589 case ir_binop_greater: 590 case ir_binop_gequal: 591 case ir_binop_less: 592 case ir_binop_lequal: 593 case ir_binop_equal: 594 case ir_binop_nequal: 595 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 596 inst->conditional_mod = 597 brw_conditional_for_comparison(expr->operation); 598 return; 599 600 case ir_binop_all_equal: 601 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 602 inst->conditional_mod = BRW_CONDITIONAL_Z; 603 604 inst = emit(BRW_OPCODE_IF); 605 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 606 return; 607 608 case ir_binop_any_nequal: 609 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 610 inst->conditional_mod = BRW_CONDITIONAL_NZ; 611 612 inst = emit(BRW_OPCODE_IF); 613 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 614 return; 615 616 default: 617 assert(!"not reached"); 618 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 619 inst->conditional_mod = BRW_CONDITIONAL_NZ; 620 return; 621 } 622 return; 623 } 624 625 ir->condition->accept(this); 626 627 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 628 this->result, src_reg(0)); 629 inst->conditional_mod = BRW_CONDITIONAL_NZ; 630} 631 632void 633vec4_visitor::visit(ir_variable *ir) 634{ 635 dst_reg *reg = NULL; 636 637 if (variable_storage(ir)) 638 return; 639 640 switch (ir->mode) { 641 case ir_var_in: 642 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 643 break; 644 645 case ir_var_out: 646 reg = new(mem_ctx) dst_reg(this, ir->type); 647 648 for (int i = 0; i < type_size(ir->type); i++) { 649 output_reg[ir->location + i] = *reg; 650 output_reg[ir->location + i].reg_offset = i; 651 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; 652 } 653 break; 654 655 case ir_var_auto: 656 case ir_var_temporary: 657 reg = new(mem_ctx) dst_reg(this, ir->type); 658 break; 659 660 case ir_var_uniform: 661 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 662 663 if (!strncmp(ir->name, "gl_", 3)) { 664 setup_builtin_uniform_values(ir); 665 } else { 666 setup_uniform_values(ir->location, ir->type); 667 } 668 break; 669 670 default: 671 assert(!"not reached"); 672 } 673 674 reg->type = brw_type_for_base_type(ir->type); 675 hash_table_insert(this->variable_ht, reg, ir); 676} 677 678void 679vec4_visitor::visit(ir_loop *ir) 680{ 681 ir_dereference_variable *counter = NULL; 682 683 fail("not yet\n"); 684 685 /* We don't want debugging output to print the whole body of the 686 * loop as the annotation. 687 */ 688 this->base_ir = NULL; 689 690 if (ir->counter != NULL) 691 counter = new(ir) ir_dereference_variable(ir->counter); 692 693 if (ir->from != NULL) { 694 assert(ir->counter != NULL); 695 696 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 697 698 a->accept(this); 699 delete a; 700 } 701 702 emit(BRW_OPCODE_DO); 703 704 if (ir->to) { 705 ir_expression *e = 706 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 707 counter, ir->to); 708 ir_if *if_stmt = new(ir) ir_if(e); 709 710 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 711 712 if_stmt->then_instructions.push_tail(brk); 713 714 if_stmt->accept(this); 715 716 delete if_stmt; 717 delete e; 718 delete brk; 719 } 720 721 visit_instructions(&ir->body_instructions); 722 723 if (ir->increment) { 724 ir_expression *e = 725 new(ir) ir_expression(ir_binop_add, counter->type, 726 counter, ir->increment); 727 728 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 729 730 a->accept(this); 731 delete a; 732 delete e; 733 } 734 735 emit(BRW_OPCODE_WHILE); 736} 737 738void 739vec4_visitor::visit(ir_loop_jump *ir) 740{ 741 switch (ir->mode) { 742 case ir_loop_jump::jump_break: 743 emit(BRW_OPCODE_BREAK); 744 break; 745 case ir_loop_jump::jump_continue: 746 emit(BRW_OPCODE_CONTINUE); 747 break; 748 } 749} 750 751 752void 753vec4_visitor::visit(ir_function_signature *ir) 754{ 755 assert(0); 756 (void)ir; 757} 758 759void 760vec4_visitor::visit(ir_function *ir) 761{ 762 /* Ignore function bodies other than main() -- we shouldn't see calls to 763 * them since they should all be inlined. 764 */ 765 if (strcmp(ir->name, "main") == 0) { 766 const ir_function_signature *sig; 767 exec_list empty; 768 769 sig = ir->matching_signature(&empty); 770 771 assert(sig); 772 773 visit_instructions(&sig->body); 774 } 775} 776 777GLboolean 778vec4_visitor::try_emit_sat(ir_expression *ir) 779{ 780 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 781 if (!sat_src) 782 return false; 783 784 sat_src->accept(this); 785 src_reg src = this->result; 786 787 this->result = src_reg(this, ir->type); 788 vec4_instruction *inst; 789 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 790 inst->saturate = true; 791 792 return true; 793} 794 795void 796vec4_visitor::emit_bool_comparison(unsigned int op, 797 dst_reg dst, src_reg src0, src_reg src1) 798{ 799 /* original gen4 does destination conversion before comparison. */ 800 if (intel->gen < 5) 801 dst.type = src0.type; 802 803 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 804 inst->conditional_mod = brw_conditional_for_comparison(op); 805 806 dst.type = BRW_REGISTER_TYPE_D; 807 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 808} 809 810void 811vec4_visitor::visit(ir_expression *ir) 812{ 813 unsigned int operand; 814 src_reg op[Elements(ir->operands)]; 815 src_reg result_src; 816 dst_reg result_dst; 817 vec4_instruction *inst; 818 819 if (try_emit_sat(ir)) 820 return; 821 822 for (operand = 0; operand < ir->get_num_operands(); operand++) { 823 this->result.file = BAD_FILE; 824 ir->operands[operand]->accept(this); 825 if (this->result.file == BAD_FILE) { 826 printf("Failed to get tree for expression operand:\n"); 827 ir->operands[operand]->print(); 828 exit(1); 829 } 830 op[operand] = this->result; 831 832 /* Matrix expression operands should have been broken down to vector 833 * operations already. 834 */ 835 assert(!ir->operands[operand]->type->is_matrix()); 836 } 837 838 int vector_elements = ir->operands[0]->type->vector_elements; 839 if (ir->operands[1]) { 840 vector_elements = MAX2(vector_elements, 841 ir->operands[1]->type->vector_elements); 842 } 843 844 this->result.file = BAD_FILE; 845 846 /* Storage for our result. Ideally for an assignment we'd be using 847 * the actual storage for the result here, instead. 848 */ 849 result_src = src_reg(this, ir->type); 850 /* convenience for the emit functions below. */ 851 result_dst = dst_reg(result_src); 852 /* If nothing special happens, this is the result. */ 853 this->result = result_src; 854 /* Limit writes to the channels that will be used by result_src later. 855 * This does limit this temp's use as a temporary for multi-instruction 856 * sequences. 857 */ 858 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 859 860 switch (ir->operation) { 861 case ir_unop_logic_not: 862 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 863 * ones complement of the whole register, not just bit 0. 864 */ 865 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 866 break; 867 case ir_unop_neg: 868 op[0].negate = !op[0].negate; 869 this->result = op[0]; 870 break; 871 case ir_unop_abs: 872 op[0].abs = true; 873 op[0].negate = false; 874 this->result = op[0]; 875 break; 876 877 case ir_unop_sign: 878 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 879 880 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 881 inst->conditional_mod = BRW_CONDITIONAL_G; 882 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 883 inst->predicate = BRW_PREDICATE_NORMAL; 884 885 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 886 inst->conditional_mod = BRW_CONDITIONAL_L; 887 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 888 inst->predicate = BRW_PREDICATE_NORMAL; 889 890 break; 891 892 case ir_unop_rcp: 893 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 894 break; 895 896 case ir_unop_exp2: 897 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 898 break; 899 case ir_unop_log2: 900 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 901 break; 902 case ir_unop_exp: 903 case ir_unop_log: 904 assert(!"not reached: should be handled by ir_explog_to_explog2"); 905 break; 906 case ir_unop_sin: 907 case ir_unop_sin_reduced: 908 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 909 break; 910 case ir_unop_cos: 911 case ir_unop_cos_reduced: 912 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 913 break; 914 915 case ir_unop_dFdx: 916 case ir_unop_dFdy: 917 assert(!"derivatives not valid in vertex shader"); 918 break; 919 920 case ir_unop_noise: 921 assert(!"not reached: should be handled by lower_noise"); 922 break; 923 924 case ir_binop_add: 925 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 926 break; 927 case ir_binop_sub: 928 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 929 break; 930 931 case ir_binop_mul: 932 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 933 break; 934 case ir_binop_div: 935 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 936 case ir_binop_mod: 937 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 938 break; 939 940 case ir_binop_less: 941 case ir_binop_greater: 942 case ir_binop_lequal: 943 case ir_binop_gequal: 944 case ir_binop_equal: 945 case ir_binop_nequal: { 946 dst_reg temp = result_dst; 947 /* original gen4 does implicit conversion before comparison. */ 948 if (intel->gen < 5) 949 temp.type = op[0].type; 950 951 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 952 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 953 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 954 break; 955 } 956 957 case ir_binop_all_equal: 958 /* "==" operator producing a scalar boolean. */ 959 if (ir->operands[0]->type->is_vector() || 960 ir->operands[1]->type->is_vector()) { 961 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 962 inst->conditional_mod = BRW_CONDITIONAL_Z; 963 964 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 965 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 966 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 967 } else { 968 dst_reg temp = result_dst; 969 /* original gen4 does implicit conversion before comparison. */ 970 if (intel->gen < 5) 971 temp.type = op[0].type; 972 973 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 974 inst->conditional_mod = BRW_CONDITIONAL_NZ; 975 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 976 } 977 break; 978 case ir_binop_any_nequal: 979 /* "!=" operator producing a scalar boolean. */ 980 if (ir->operands[0]->type->is_vector() || 981 ir->operands[1]->type->is_vector()) { 982 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 983 inst->conditional_mod = BRW_CONDITIONAL_NZ; 984 985 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 986 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 987 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 988 } else { 989 dst_reg temp = result_dst; 990 /* original gen4 does implicit conversion before comparison. */ 991 if (intel->gen < 5) 992 temp.type = op[0].type; 993 994 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 995 inst->conditional_mod = BRW_CONDITIONAL_NZ; 996 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 997 } 998 break; 999 1000 case ir_unop_any: 1001 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 1002 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1003 1004 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1005 1006 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1007 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1008 break; 1009 1010 case ir_binop_logic_xor: 1011 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1012 break; 1013 1014 case ir_binop_logic_or: 1015 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1016 break; 1017 1018 case ir_binop_logic_and: 1019 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1020 break; 1021 1022 case ir_binop_dot: 1023 assert(ir->operands[0]->type->is_vector()); 1024 assert(ir->operands[0]->type == ir->operands[1]->type); 1025 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1026 break; 1027 1028 case ir_unop_sqrt: 1029 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1030 break; 1031 case ir_unop_rsq: 1032 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1033 break; 1034 case ir_unop_i2f: 1035 case ir_unop_i2u: 1036 case ir_unop_u2i: 1037 case ir_unop_u2f: 1038 case ir_unop_b2f: 1039 case ir_unop_b2i: 1040 case ir_unop_f2i: 1041 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1042 break; 1043 case ir_unop_f2b: 1044 case ir_unop_i2b: { 1045 dst_reg temp = result_dst; 1046 /* original gen4 does implicit conversion before comparison. */ 1047 if (intel->gen < 5) 1048 temp.type = op[0].type; 1049 1050 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1051 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1052 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1053 break; 1054 } 1055 1056 case ir_unop_trunc: 1057 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1058 break; 1059 case ir_unop_ceil: 1060 op[0].negate = !op[0].negate; 1061 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1062 this->result.negate = true; 1063 break; 1064 case ir_unop_floor: 1065 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1066 break; 1067 case ir_unop_fract: 1068 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1069 break; 1070 case ir_unop_round_even: 1071 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1072 break; 1073 1074 case ir_binop_min: 1075 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1076 inst->conditional_mod = BRW_CONDITIONAL_L; 1077 1078 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1079 inst->predicate = BRW_PREDICATE_NORMAL; 1080 break; 1081 case ir_binop_max: 1082 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1083 inst->conditional_mod = BRW_CONDITIONAL_G; 1084 1085 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1086 inst->predicate = BRW_PREDICATE_NORMAL; 1087 break; 1088 1089 case ir_binop_pow: 1090 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1091 break; 1092 1093 case ir_unop_bit_not: 1094 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1095 break; 1096 case ir_binop_bit_and: 1097 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1098 break; 1099 case ir_binop_bit_xor: 1100 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1101 break; 1102 case ir_binop_bit_or: 1103 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1104 break; 1105 1106 case ir_binop_lshift: 1107 case ir_binop_rshift: 1108 assert(!"GLSL 1.30 features unsupported"); 1109 break; 1110 1111 case ir_quadop_vector: 1112 assert(!"not reached: should be handled by lower_quadop_vector"); 1113 break; 1114 } 1115} 1116 1117 1118void 1119vec4_visitor::visit(ir_swizzle *ir) 1120{ 1121 src_reg src; 1122 int i = 0; 1123 int swizzle[4]; 1124 1125 /* Note that this is only swizzles in expressions, not those on the left 1126 * hand side of an assignment, which do write masking. See ir_assignment 1127 * for that. 1128 */ 1129 1130 ir->val->accept(this); 1131 src = this->result; 1132 assert(src.file != BAD_FILE); 1133 1134 for (i = 0; i < ir->type->vector_elements; i++) { 1135 switch (i) { 1136 case 0: 1137 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1138 break; 1139 case 1: 1140 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1141 break; 1142 case 2: 1143 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1144 break; 1145 case 3: 1146 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1147 break; 1148 } 1149 } 1150 for (; i < 4; i++) { 1151 /* Replicate the last channel out. */ 1152 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1153 } 1154 1155 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1156 1157 this->result = src; 1158} 1159 1160void 1161vec4_visitor::visit(ir_dereference_variable *ir) 1162{ 1163 const struct glsl_type *type = ir->type; 1164 dst_reg *reg = variable_storage(ir->var); 1165 1166 if (!reg) { 1167 fail("Failed to find variable storage for %s\n", ir->var->name); 1168 this->result = src_reg(brw_null_reg()); 1169 return; 1170 } 1171 1172 this->result = src_reg(*reg); 1173 1174 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1175 this->result.swizzle = swizzle_for_size(type->vector_elements); 1176} 1177 1178void 1179vec4_visitor::visit(ir_dereference_array *ir) 1180{ 1181 ir_constant *constant_index; 1182 src_reg src; 1183 int element_size = type_size(ir->type); 1184 1185 constant_index = ir->array_index->constant_expression_value(); 1186 1187 ir->array->accept(this); 1188 src = this->result; 1189 1190 if (constant_index) { 1191 src.reg_offset += constant_index->value.i[0] * element_size; 1192 } else { 1193 /* Variable index array dereference. It eats the "vec4" of the 1194 * base of the array and an index that offsets the Mesa register 1195 * index. 1196 */ 1197 ir->array_index->accept(this); 1198 1199 src_reg index_reg; 1200 1201 if (element_size == 1) { 1202 index_reg = this->result; 1203 } else { 1204 index_reg = src_reg(this, glsl_type::int_type); 1205 1206 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1207 this->result, src_reg(element_size)); 1208 } 1209 1210 if (src.reladdr) { 1211 src_reg temp = src_reg(this, glsl_type::int_type); 1212 1213 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); 1214 1215 index_reg = temp; 1216 } 1217 1218 src.reladdr = ralloc(mem_ctx, src_reg); 1219 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1220 } 1221 1222 /* If the type is smaller than a vec4, replicate the last channel out. */ 1223 if (ir->type->is_scalar() || ir->type->is_vector()) 1224 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1225 else 1226 src.swizzle = BRW_SWIZZLE_NOOP; 1227 src.type = brw_type_for_base_type(ir->type); 1228 1229 this->result = src; 1230} 1231 1232void 1233vec4_visitor::visit(ir_dereference_record *ir) 1234{ 1235 unsigned int i; 1236 const glsl_type *struct_type = ir->record->type; 1237 int offset = 0; 1238 1239 ir->record->accept(this); 1240 1241 for (i = 0; i < struct_type->length; i++) { 1242 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1243 break; 1244 offset += type_size(struct_type->fields.structure[i].type); 1245 } 1246 1247 /* If the type is smaller than a vec4, replicate the last channel out. */ 1248 if (ir->type->is_scalar() || ir->type->is_vector()) 1249 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1250 else 1251 this->result.swizzle = BRW_SWIZZLE_NOOP; 1252 this->result.type = brw_type_for_base_type(ir->type); 1253 1254 this->result.reg_offset += offset; 1255} 1256 1257/** 1258 * We want to be careful in assignment setup to hit the actual storage 1259 * instead of potentially using a temporary like we might with the 1260 * ir_dereference handler. 1261 */ 1262static dst_reg 1263get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1264{ 1265 /* The LHS must be a dereference. If the LHS is a variable indexed array 1266 * access of a vector, it must be separated into a series conditional moves 1267 * before reaching this point (see ir_vec_index_to_cond_assign). 1268 */ 1269 assert(ir->as_dereference()); 1270 ir_dereference_array *deref_array = ir->as_dereference_array(); 1271 if (deref_array) { 1272 assert(!deref_array->array->type->is_vector()); 1273 } 1274 1275 /* Use the rvalue deref handler for the most part. We'll ignore 1276 * swizzles in it and write swizzles using writemask, though. 1277 */ 1278 ir->accept(v); 1279 return dst_reg(v->result); 1280} 1281 1282void 1283vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, 1284 const struct glsl_type *type, bool predicated) 1285{ 1286 if (type->base_type == GLSL_TYPE_STRUCT) { 1287 for (unsigned int i = 0; i < type->length; i++) { 1288 emit_block_move(dst, src, type->fields.structure[i].type, predicated); 1289 } 1290 return; 1291 } 1292 1293 if (type->is_array()) { 1294 for (unsigned int i = 0; i < type->length; i++) { 1295 emit_block_move(dst, src, type->fields.array, predicated); 1296 } 1297 return; 1298 } 1299 1300 if (type->is_matrix()) { 1301 const struct glsl_type *vec_type; 1302 1303 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 1304 type->vector_elements, 1); 1305 1306 for (int i = 0; i < type->matrix_columns; i++) { 1307 emit_block_move(dst, src, vec_type, predicated); 1308 } 1309 return; 1310 } 1311 1312 assert(type->is_scalar() || type->is_vector()); 1313 1314 dst->type = brw_type_for_base_type(type); 1315 src->type = dst->type; 1316 1317 dst->writemask = (1 << type->vector_elements) - 1; 1318 1319 /* Do we need to worry about swizzling a swizzle? */ 1320 assert(src->swizzle = BRW_SWIZZLE_NOOP); 1321 src->swizzle = swizzle_for_size(type->vector_elements); 1322 1323 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); 1324 if (predicated) 1325 inst->predicate = BRW_PREDICATE_NORMAL; 1326 1327 dst->reg_offset++; 1328 src->reg_offset++; 1329} 1330 1331void 1332vec4_visitor::visit(ir_assignment *ir) 1333{ 1334 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1335 1336 if (!ir->lhs->type->is_scalar() && 1337 !ir->lhs->type->is_vector()) { 1338 ir->rhs->accept(this); 1339 src_reg src = this->result; 1340 1341 if (ir->condition) { 1342 emit_bool_to_cond_code(ir->condition); 1343 } 1344 1345 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); 1346 return; 1347 } 1348 1349 /* Now we're down to just a scalar/vector with writemasks. */ 1350 int i; 1351 1352 ir->rhs->accept(this); 1353 src_reg src = this->result; 1354 1355 int swizzles[4]; 1356 int first_enabled_chan = 0; 1357 int src_chan = 0; 1358 1359 assert(ir->lhs->type->is_vector() || 1360 ir->lhs->type->is_scalar()); 1361 dst.writemask = ir->write_mask; 1362 1363 for (int i = 0; i < 4; i++) { 1364 if (dst.writemask & (1 << i)) { 1365 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1366 break; 1367 } 1368 } 1369 1370 /* Swizzle a small RHS vector into the channels being written. 1371 * 1372 * glsl ir treats write_mask as dictating how many channels are 1373 * present on the RHS while in our instructions we need to make 1374 * those channels appear in the slots of the vec4 they're written to. 1375 */ 1376 for (int i = 0; i < 4; i++) { 1377 if (dst.writemask & (1 << i)) 1378 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1379 else 1380 swizzles[i] = first_enabled_chan; 1381 } 1382 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1383 swizzles[2], swizzles[3]); 1384 1385 if (ir->condition) { 1386 emit_bool_to_cond_code(ir->condition); 1387 } 1388 1389 for (i = 0; i < type_size(ir->lhs->type); i++) { 1390 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1391 1392 if (ir->condition) 1393 inst->predicate = BRW_PREDICATE_NORMAL; 1394 1395 dst.reg_offset++; 1396 src.reg_offset++; 1397 } 1398} 1399 1400void 1401vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) 1402{ 1403 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1404 foreach_list(node, &ir->components) { 1405 ir_constant *field_value = (ir_constant *)node; 1406 1407 emit_constant_values(dst, field_value); 1408 } 1409 return; 1410 } 1411 1412 if (ir->type->is_array()) { 1413 for (unsigned int i = 0; i < ir->type->length; i++) { 1414 emit_constant_values(dst, ir->array_elements[i]); 1415 } 1416 return; 1417 } 1418 1419 if (ir->type->is_matrix()) { 1420 for (int i = 0; i < ir->type->matrix_columns; i++) { 1421 for (int j = 0; j < ir->type->vector_elements; j++) { 1422 dst->writemask = 1 << j; 1423 dst->type = BRW_REGISTER_TYPE_F; 1424 1425 emit(BRW_OPCODE_MOV, *dst, 1426 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1427 } 1428 dst->reg_offset++; 1429 } 1430 return; 1431 } 1432 1433 for (int i = 0; i < ir->type->vector_elements; i++) { 1434 dst->writemask = 1 << i; 1435 dst->type = brw_type_for_base_type(ir->type); 1436 1437 switch (ir->type->base_type) { 1438 case GLSL_TYPE_FLOAT: 1439 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); 1440 break; 1441 case GLSL_TYPE_INT: 1442 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); 1443 break; 1444 case GLSL_TYPE_UINT: 1445 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); 1446 break; 1447 case GLSL_TYPE_BOOL: 1448 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); 1449 break; 1450 default: 1451 assert(!"Non-float/uint/int/bool constant"); 1452 break; 1453 } 1454 } 1455 dst->reg_offset++; 1456} 1457 1458void 1459vec4_visitor::visit(ir_constant *ir) 1460{ 1461 dst_reg dst = dst_reg(this, ir->type); 1462 this->result = src_reg(dst); 1463 1464 emit_constant_values(&dst, ir); 1465} 1466 1467void 1468vec4_visitor::visit(ir_call *ir) 1469{ 1470 assert(!"not reached"); 1471} 1472 1473void 1474vec4_visitor::visit(ir_texture *ir) 1475{ 1476 assert(!"not reached"); 1477} 1478 1479void 1480vec4_visitor::visit(ir_return *ir) 1481{ 1482 assert(!"not reached"); 1483} 1484 1485void 1486vec4_visitor::visit(ir_discard *ir) 1487{ 1488 assert(!"not reached"); 1489} 1490 1491void 1492vec4_visitor::visit(ir_if *ir) 1493{ 1494 /* Don't point the annotation at the if statement, because then it plus 1495 * the then and else blocks get printed. 1496 */ 1497 this->base_ir = ir->condition; 1498 1499 if (intel->gen == 6) { 1500 emit_if_gen6(ir); 1501 } else { 1502 emit_bool_to_cond_code(ir->condition); 1503 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1504 inst->predicate = BRW_PREDICATE_NORMAL; 1505 } 1506 1507 visit_instructions(&ir->then_instructions); 1508 1509 if (!ir->else_instructions.is_empty()) { 1510 this->base_ir = ir->condition; 1511 emit(BRW_OPCODE_ELSE); 1512 1513 visit_instructions(&ir->else_instructions); 1514 } 1515 1516 this->base_ir = ir->condition; 1517 emit(BRW_OPCODE_ENDIF); 1518} 1519 1520int 1521vec4_visitor::emit_vue_header_gen4(int header_mrf) 1522{ 1523 /* Get the position */ 1524 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1525 1526 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1527 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1528 1529 current_annotation = "NDC"; 1530 dst_reg ndc_w = ndc; 1531 ndc_w.writemask = WRITEMASK_W; 1532 src_reg pos_w = pos; 1533 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1534 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1535 1536 dst_reg ndc_xyz = ndc; 1537 ndc_xyz.writemask = WRITEMASK_XYZ; 1538 1539 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1540 1541 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1542 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1543 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1544 GLuint i; 1545 1546 emit(BRW_OPCODE_MOV, header1, 0u); 1547 1548 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1549 assert(!"finishme: psiz"); 1550 src_reg psiz; 1551 1552 header1.writemask = WRITEMASK_W; 1553 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1554 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1555 } 1556 1557 for (i = 0; i < c->key.nr_userclip; i++) { 1558 vec4_instruction *inst; 1559 1560 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1561 pos, src_reg(c->userplane[i])); 1562 inst->conditional_mod = BRW_CONDITIONAL_L; 1563 1564 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1565 inst->predicate = BRW_PREDICATE_NORMAL; 1566 } 1567 1568 /* i965 clipping workaround: 1569 * 1) Test for -ve rhw 1570 * 2) If set, 1571 * set ndc = (0,0,0,0) 1572 * set ucp[6] = 1 1573 * 1574 * Later, clipping will detect ucp[6] and ensure the primitive is 1575 * clipped against all fixed planes. 1576 */ 1577 if (brw->has_negative_rhw_bug) { 1578#if 0 1579 /* FINISHME */ 1580 brw_CMP(p, 1581 vec8(brw_null_reg()), 1582 BRW_CONDITIONAL_L, 1583 brw_swizzle1(ndc, 3), 1584 brw_imm_f(0)); 1585 1586 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1587 brw_MOV(p, ndc, brw_imm_f(0)); 1588 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1589#endif 1590 } 1591 1592 header1.writemask = WRITEMASK_XYZW; 1593 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1594 } else { 1595 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1596 BRW_REGISTER_TYPE_UD), 0u); 1597 } 1598 1599 if (intel->gen == 5) { 1600 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1601 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1602 * dword 4-7 (m2) is the ndc position (set above) 1603 * dword 8-11 (m3) of the vertex header is the 4D space position 1604 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1605 * m6 is a pad so that the vertex element data is aligned 1606 * m7 is the first vertex data we fill. 1607 */ 1608 current_annotation = "NDC"; 1609 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1610 1611 current_annotation = "gl_Position"; 1612 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1613 1614 /* user clip distance. */ 1615 header_mrf += 2; 1616 1617 /* Pad so that vertex element data is aligned. */ 1618 header_mrf++; 1619 } else { 1620 /* There are 8 dwords in VUE header pre-Ironlake: 1621 * dword 0-3 (m1) is indices, point width, clip flags. 1622 * dword 4-7 (m2) is ndc position (set above) 1623 * 1624 * dword 8-11 (m3) is the first vertex data. 1625 */ 1626 current_annotation = "NDC"; 1627 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1628 1629 current_annotation = "gl_Position"; 1630 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1631 } 1632 1633 return header_mrf; 1634} 1635 1636int 1637vec4_visitor::emit_vue_header_gen6(int header_mrf) 1638{ 1639 struct brw_reg reg; 1640 1641 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1642 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1643 * dword 4-7 (m3) is the 4D space position 1644 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1645 * enabled. 1646 * 1647 * m4 or 6 is the first vertex element data we fill. 1648 */ 1649 1650 current_annotation = "indices, point width, clip flags"; 1651 reg = brw_message_reg(header_mrf++); 1652 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1653 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1654 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1655 src_reg(output_reg[VERT_RESULT_PSIZ])); 1656 } 1657 1658 current_annotation = "gl_Position"; 1659 emit(BRW_OPCODE_MOV, 1660 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1661 1662 current_annotation = "user clip distances"; 1663 if (c->key.nr_userclip) { 1664 for (int i = 0; i < c->key.nr_userclip; i++) { 1665 struct brw_reg m; 1666 if (i < 4) 1667 m = brw_message_reg(header_mrf); 1668 else 1669 m = brw_message_reg(header_mrf + 1); 1670 1671 emit(BRW_OPCODE_DP4, 1672 dst_reg(brw_writemask(m, 1 << (i & 3))), 1673 src_reg(c->userplane[i])); 1674 } 1675 header_mrf += 2; 1676 } 1677 1678 current_annotation = NULL; 1679 1680 return header_mrf; 1681} 1682 1683static int 1684align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1685{ 1686 struct intel_context *intel = &brw->intel; 1687 1688 if (intel->gen >= 6) { 1689 /* URB data written (does not include the message header reg) must 1690 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1691 * section 5.4.3.2.2: URB_INTERLEAVED. 1692 * 1693 * URB entries are allocated on a multiple of 1024 bits, so an 1694 * extra 128 bits written here to make the end align to 256 is 1695 * no problem. 1696 */ 1697 if ((mlen % 2) != 1) 1698 mlen++; 1699 } 1700 1701 return mlen; 1702} 1703 1704/** 1705 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1706 * complete the VS thread. 1707 * 1708 * The VUE layout is documented in Volume 2a. 1709 */ 1710void 1711vec4_visitor::emit_urb_writes() 1712{ 1713 /* MRF 0 is reserved for the debugger, so start with message header 1714 * in MRF 1. 1715 */ 1716 int base_mrf = 1; 1717 int mrf = base_mrf; 1718 int urb_entry_size; 1719 uint64_t outputs_remaining = c->prog_data.outputs_written; 1720 /* In the process of generating our URB write message contents, we 1721 * may need to unspill a register or load from an array. Those 1722 * reads would use MRFs 14-15. 1723 */ 1724 int max_usable_mrf = 13; 1725 1726 /* FINISHME: edgeflag */ 1727 1728 /* First mrf is the g0-based message header containing URB handles and such, 1729 * which is implied in VS_OPCODE_URB_WRITE. 1730 */ 1731 mrf++; 1732 1733 if (intel->gen >= 6) { 1734 mrf = emit_vue_header_gen6(mrf); 1735 } else { 1736 mrf = emit_vue_header_gen4(mrf); 1737 } 1738 1739 /* Set up the VUE data for the first URB write */ 1740 int attr; 1741 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1742 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1743 continue; 1744 1745 outputs_remaining &= ~BITFIELD64_BIT(attr); 1746 1747 /* This is set up in the VUE header. */ 1748 if (attr == VERT_RESULT_HPOS) 1749 continue; 1750 1751 /* This is loaded into the VUE header, and thus doesn't occupy 1752 * an attribute slot. 1753 */ 1754 if (attr == VERT_RESULT_PSIZ) 1755 continue; 1756 1757 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1758 1759 /* If this was MRF 15, we can't fit anything more into this URB 1760 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1761 * even-numbered amount of URB write data, which will meet 1762 * gen6's requirements for length alignment. 1763 */ 1764 if (mrf > max_usable_mrf) { 1765 attr++; 1766 break; 1767 } 1768 } 1769 1770 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1771 inst->base_mrf = base_mrf; 1772 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1773 inst->eot = !outputs_remaining; 1774 1775 urb_entry_size = mrf - base_mrf; 1776 1777 /* Optional second URB write */ 1778 if (outputs_remaining) { 1779 mrf = base_mrf + 1; 1780 1781 for (; attr < VERT_RESULT_MAX; attr++) { 1782 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1783 continue; 1784 1785 assert(mrf < max_usable_mrf); 1786 1787 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1788 } 1789 1790 inst = emit(VS_OPCODE_URB_WRITE); 1791 inst->base_mrf = base_mrf; 1792 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1793 inst->eot = true; 1794 /* URB destination offset. In the previous write, we got MRFs 1795 * 2-13 minus the one header MRF, so 12 regs. URB offset is in 1796 * URB row increments, and each of our MRFs is half of one of 1797 * those, since we're doing interleaved writes. 1798 */ 1799 inst->offset = (max_usable_mrf - base_mrf) / 2; 1800 1801 urb_entry_size += mrf - base_mrf; 1802 } 1803 1804 if (intel->gen == 6) 1805 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1806 else 1807 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1808} 1809 1810src_reg 1811vec4_visitor::get_scratch_offset(vec4_instruction *inst, 1812 src_reg *reladdr, int reg_offset) 1813{ 1814 /* Because we store the values to scratch interleaved like our 1815 * vertex data, we need to scale the vec4 index by 2. 1816 */ 1817 int message_header_scale = 2; 1818 1819 /* Pre-gen6, the message header uses byte offsets instead of vec4 1820 * (16-byte) offset units. 1821 */ 1822 if (intel->gen < 6) 1823 message_header_scale *= 16; 1824 1825 if (reladdr) { 1826 src_reg index = src_reg(this, glsl_type::int_type); 1827 1828 vec4_instruction *add = emit(BRW_OPCODE_ADD, 1829 dst_reg(index), 1830 *reladdr, 1831 src_reg(reg_offset)); 1832 /* Move our new instruction from the tail to its correct place. */ 1833 add->remove(); 1834 inst->insert_before(add); 1835 1836 vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), 1837 index, src_reg(message_header_scale)); 1838 mul->remove(); 1839 inst->insert_before(mul); 1840 1841 return index; 1842 } else { 1843 return src_reg(reg_offset * message_header_scale); 1844 } 1845} 1846 1847/** 1848 * Emits an instruction before @inst to load the value named by @orig_src 1849 * from scratch space at @base_offset to @temp. 1850 */ 1851void 1852vec4_visitor::emit_scratch_read(vec4_instruction *inst, 1853 dst_reg temp, src_reg orig_src, 1854 int base_offset) 1855{ 1856 int reg_offset = base_offset + orig_src.reg_offset; 1857 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); 1858 1859 vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, 1860 temp, index); 1861 1862 scratch_read_inst->base_mrf = 14; 1863 scratch_read_inst->mlen = 1; 1864 /* Move our instruction from the tail to its correct place. */ 1865 scratch_read_inst->remove(); 1866 inst->insert_before(scratch_read_inst); 1867} 1868 1869/** 1870 * Emits an instruction after @inst to store the value to be written 1871 * to @orig_dst to scratch space at @base_offset, from @temp. 1872 */ 1873void 1874vec4_visitor::emit_scratch_write(vec4_instruction *inst, 1875 src_reg temp, dst_reg orig_dst, 1876 int base_offset) 1877{ 1878 int reg_offset = base_offset + orig_dst.reg_offset; 1879 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); 1880 1881 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), 1882 orig_dst.writemask)); 1883 vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, 1884 dst, temp, index); 1885 scratch_write_inst->base_mrf = 13; 1886 scratch_write_inst->mlen = 2; 1887 scratch_write_inst->predicate = inst->predicate; 1888 /* Move our instruction from the tail to its correct place. */ 1889 scratch_write_inst->remove(); 1890 inst->insert_after(scratch_write_inst); 1891} 1892 1893/** 1894 * We can't generally support array access in GRF space, because a 1895 * single instruction's destination can only span 2 contiguous 1896 * registers. So, we send all GRF arrays that get variable index 1897 * access to scratch space. 1898 */ 1899void 1900vec4_visitor::move_grf_array_access_to_scratch() 1901{ 1902 int scratch_loc[this->virtual_grf_count]; 1903 1904 for (int i = 0; i < this->virtual_grf_count; i++) { 1905 scratch_loc[i] = -1; 1906 } 1907 1908 /* First, calculate the set of virtual GRFs that need to be punted 1909 * to scratch due to having any array access on them, and where in 1910 * scratch. 1911 */ 1912 foreach_list(node, &this->instructions) { 1913 vec4_instruction *inst = (vec4_instruction *)node; 1914 1915 if (inst->dst.file == GRF && inst->dst.reladdr && 1916 scratch_loc[inst->dst.reg] == -1) { 1917 scratch_loc[inst->dst.reg] = c->last_scratch; 1918 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; 1919 } 1920 1921 for (int i = 0 ; i < 3; i++) { 1922 src_reg *src = &inst->src[i]; 1923 1924 if (src->file == GRF && src->reladdr && 1925 scratch_loc[src->reg] == -1) { 1926 scratch_loc[src->reg] = c->last_scratch; 1927 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; 1928 } 1929 } 1930 } 1931 1932 /* Now, for anything that will be accessed through scratch, rewrite 1933 * it to load/store. Note that this is a _safe list walk, because 1934 * we may generate a new scratch_write instruction after the one 1935 * we're processing. 1936 */ 1937 foreach_list_safe(node, &this->instructions) { 1938 vec4_instruction *inst = (vec4_instruction *)node; 1939 1940 /* Set up the annotation tracking for new generated instructions. */ 1941 base_ir = inst->ir; 1942 current_annotation = inst->annotation; 1943 1944 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { 1945 src_reg temp = src_reg(this, glsl_type::vec4_type); 1946 1947 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); 1948 1949 inst->dst.file = temp.file; 1950 inst->dst.reg = temp.reg; 1951 inst->dst.reg_offset = temp.reg_offset; 1952 inst->dst.reladdr = NULL; 1953 } 1954 1955 for (int i = 0 ; i < 3; i++) { 1956 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) 1957 continue; 1958 1959 dst_reg temp = dst_reg(this, glsl_type::vec4_type); 1960 1961 emit_scratch_read(inst, temp, inst->src[i], 1962 scratch_loc[inst->src[i].reg]); 1963 1964 inst->src[i].file = temp.file; 1965 inst->src[i].reg = temp.reg; 1966 inst->src[i].reg_offset = temp.reg_offset; 1967 inst->src[i].reladdr = NULL; 1968 } 1969 } 1970} 1971 1972 1973vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1974 struct gl_shader_program *prog, 1975 struct brw_shader *shader) 1976{ 1977 this->c = c; 1978 this->p = &c->func; 1979 this->brw = p->brw; 1980 this->intel = &brw->intel; 1981 this->ctx = &intel->ctx; 1982 this->prog = prog; 1983 this->shader = shader; 1984 1985 this->mem_ctx = ralloc_context(NULL); 1986 this->failed = false; 1987 1988 this->base_ir = NULL; 1989 this->current_annotation = NULL; 1990 1991 this->c = c; 1992 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1993 this->prog_data = &c->prog_data; 1994 1995 this->variable_ht = hash_table_ctor(0, 1996 hash_table_pointer_hash, 1997 hash_table_pointer_compare); 1998 1999 this->virtual_grf_sizes = NULL; 2000 this->virtual_grf_count = 0; 2001 this->virtual_grf_array_size = 0; 2002 2003 this->uniforms = 0; 2004 2005 this->variable_ht = hash_table_ctor(0, 2006 hash_table_pointer_hash, 2007 hash_table_pointer_compare); 2008} 2009 2010vec4_visitor::~vec4_visitor() 2011{ 2012 hash_table_dtor(this->variable_ht); 2013} 2014 2015 2016void 2017vec4_visitor::fail(const char *format, ...) 2018{ 2019 va_list va; 2020 char *msg; 2021 2022 if (failed) 2023 return; 2024 2025 failed = true; 2026 2027 va_start(va, format); 2028 msg = ralloc_vasprintf(mem_ctx, format, va); 2029 va_end(va); 2030 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 2031 2032 this->fail_msg = msg; 2033 2034 if (INTEL_DEBUG & DEBUG_VS) { 2035 fprintf(stderr, "%s", msg); 2036 } 2037} 2038 2039} /* namespace brw */ 2040