brw_vec4_visitor.cpp revision 31ef2e3ec2f5837eea0899b4bda5ea15e335a6a2
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. 126 */ 127 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 128 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 129 130 emit(opcode, dst, temp_src); 131} 132 133void 134vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 135{ 136 vec4_instruction *inst = emit(opcode, dst, src); 137 inst->base_mrf = 1; 138 inst->mlen = 1; 139} 140 141void 142vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 143{ 144 switch (opcode) { 145 case SHADER_OPCODE_RCP: 146 case SHADER_OPCODE_RSQ: 147 case SHADER_OPCODE_SQRT: 148 case SHADER_OPCODE_EXP2: 149 case SHADER_OPCODE_LOG2: 150 case SHADER_OPCODE_SIN: 151 case SHADER_OPCODE_COS: 152 break; 153 default: 154 assert(!"not reached: bad math opcode"); 155 return; 156 } 157 158 if (intel->gen >= 6) { 159 return emit_math1_gen6(opcode, dst, src); 160 } else { 161 return emit_math1_gen4(opcode, dst, src); 162 } 163} 164 165void 166vec4_visitor::emit_math2_gen6(enum opcode opcode, 167 dst_reg dst, src_reg src0, src_reg src1) 168{ 169 src_reg expanded; 170 171 /* The gen6 math instruction ignores the source modifiers -- 172 * swizzle, abs, negate, and at least some parts of the register 173 * region description. Move the sources to temporaries to make it 174 * generally work. 175 */ 176 177 expanded = src_reg(this, glsl_type::vec4_type); 178 emit(BRW_OPCODE_MOV, dst, src0); 179 src0 = expanded; 180 181 expanded = src_reg(this, glsl_type::vec4_type); 182 emit(BRW_OPCODE_MOV, dst, src1); 183 src1 = expanded; 184 185 emit(opcode, dst, src0, src1); 186} 187 188void 189vec4_visitor::emit_math2_gen4(enum opcode opcode, 190 dst_reg dst, src_reg src0, src_reg src1) 191{ 192 vec4_instruction *inst = emit(opcode, dst, src0, src1); 193 inst->base_mrf = 1; 194 inst->mlen = 2; 195} 196 197void 198vec4_visitor::emit_math(enum opcode opcode, 199 dst_reg dst, src_reg src0, src_reg src1) 200{ 201 assert(opcode == SHADER_OPCODE_POW); 202 203 if (intel->gen >= 6) { 204 return emit_math2_gen6(opcode, dst, src0, src1); 205 } else { 206 return emit_math2_gen4(opcode, dst, src0, src1); 207 } 208} 209 210void 211vec4_visitor::visit_instructions(const exec_list *list) 212{ 213 foreach_iter(exec_list_iterator, iter, *list) { 214 ir_instruction *ir = (ir_instruction *)iter.get(); 215 216 base_ir = ir; 217 ir->accept(this); 218 } 219} 220 221 222static int 223type_size(const struct glsl_type *type) 224{ 225 unsigned int i; 226 int size; 227 228 switch (type->base_type) { 229 case GLSL_TYPE_UINT: 230 case GLSL_TYPE_INT: 231 case GLSL_TYPE_FLOAT: 232 case GLSL_TYPE_BOOL: 233 if (type->is_matrix()) { 234 return type->matrix_columns; 235 } else { 236 /* Regardless of size of vector, it gets a vec4. This is bad 237 * packing for things like floats, but otherwise arrays become a 238 * mess. Hopefully a later pass over the code can pack scalars 239 * down if appropriate. 240 */ 241 return 1; 242 } 243 case GLSL_TYPE_ARRAY: 244 assert(type->length > 0); 245 return type_size(type->fields.array) * type->length; 246 case GLSL_TYPE_STRUCT: 247 size = 0; 248 for (i = 0; i < type->length; i++) { 249 size += type_size(type->fields.structure[i].type); 250 } 251 return size; 252 case GLSL_TYPE_SAMPLER: 253 /* Samplers take up one slot in UNIFORMS[], but they're baked in 254 * at link time. 255 */ 256 return 1; 257 default: 258 assert(0); 259 return 0; 260 } 261} 262 263int 264vec4_visitor::virtual_grf_alloc(int size) 265{ 266 if (virtual_grf_array_size <= virtual_grf_count) { 267 if (virtual_grf_array_size == 0) 268 virtual_grf_array_size = 16; 269 else 270 virtual_grf_array_size *= 2; 271 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 272 virtual_grf_array_size); 273 } 274 virtual_grf_sizes[virtual_grf_count] = size; 275 return virtual_grf_count++; 276} 277 278src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 279{ 280 init(); 281 282 this->file = GRF; 283 this->reg = v->virtual_grf_alloc(type_size(type)); 284 285 if (type->is_array() || type->is_record()) { 286 this->swizzle = BRW_SWIZZLE_NOOP; 287 } else { 288 this->swizzle = swizzle_for_size(type->vector_elements); 289 } 290 291 this->type = brw_type_for_base_type(type); 292} 293 294dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 295{ 296 init(); 297 298 this->file = GRF; 299 this->reg = v->virtual_grf_alloc(type_size(type)); 300 301 if (type->is_array() || type->is_record()) { 302 this->writemask = WRITEMASK_XYZW; 303 } else { 304 this->writemask = (1 << type->vector_elements) - 1; 305 } 306 307 this->type = brw_type_for_base_type(type); 308} 309 310/* Our support for uniforms is piggy-backed on the struct 311 * gl_fragment_program, because that's where the values actually 312 * get stored, rather than in some global gl_shader_program uniform 313 * store. 314 */ 315int 316vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 317{ 318 unsigned int offset = 0; 319 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 320 321 if (type->is_matrix()) { 322 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 323 type->vector_elements, 324 1); 325 326 for (unsigned int i = 0; i < type->matrix_columns; i++) { 327 offset += setup_uniform_values(loc + offset, column); 328 } 329 330 return offset; 331 } 332 333 switch (type->base_type) { 334 case GLSL_TYPE_FLOAT: 335 case GLSL_TYPE_UINT: 336 case GLSL_TYPE_INT: 337 case GLSL_TYPE_BOOL: 338 for (unsigned int i = 0; i < type->vector_elements; i++) { 339 int slot = this->uniforms * 4 + i; 340 switch (type->base_type) { 341 case GLSL_TYPE_FLOAT: 342 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 343 break; 344 case GLSL_TYPE_UINT: 345 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 346 break; 347 case GLSL_TYPE_INT: 348 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 349 break; 350 case GLSL_TYPE_BOOL: 351 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 352 break; 353 default: 354 assert(!"not reached"); 355 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 356 break; 357 } 358 c->prog_data.param[slot] = &values[i]; 359 } 360 361 for (unsigned int i = type->vector_elements; i < 4; i++) { 362 c->prog_data.param_convert[this->uniforms * 4 + i] = 363 PARAM_CONVERT_ZERO; 364 c->prog_data.param[this->uniforms * 4 + i] = NULL; 365 } 366 367 this->uniform_size[this->uniforms] = type->vector_elements; 368 this->uniforms++; 369 370 return 1; 371 372 case GLSL_TYPE_STRUCT: 373 for (unsigned int i = 0; i < type->length; i++) { 374 offset += setup_uniform_values(loc + offset, 375 type->fields.structure[i].type); 376 } 377 return offset; 378 379 case GLSL_TYPE_ARRAY: 380 for (unsigned int i = 0; i < type->length; i++) { 381 offset += setup_uniform_values(loc + offset, type->fields.array); 382 } 383 return offset; 384 385 case GLSL_TYPE_SAMPLER: 386 /* The sampler takes up a slot, but we don't use any values from it. */ 387 return 1; 388 389 default: 390 assert(!"not reached"); 391 return 0; 392 } 393} 394 395/* Our support for builtin uniforms is even scarier than non-builtin. 396 * It sits on top of the PROG_STATE_VAR parameters that are 397 * automatically updated from GL context state. 398 */ 399void 400vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 401{ 402 const ir_state_slot *const slots = ir->state_slots; 403 assert(ir->state_slots != NULL); 404 405 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 406 /* This state reference has already been setup by ir_to_mesa, 407 * but we'll get the same index back here. We can reference 408 * ParameterValues directly, since unlike brw_fs.cpp, we never 409 * add new state references during compile. 410 */ 411 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 412 (gl_state_index *)slots[i].tokens); 413 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 414 415 this->uniform_size[this->uniforms] = 0; 416 /* Add each of the unique swizzled channels of the element. 417 * This will end up matching the size of the glsl_type of this field. 418 */ 419 int last_swiz = -1; 420 for (unsigned int j = 0; j < 4; j++) { 421 int swiz = GET_SWZ(slots[i].swizzle, j); 422 if (swiz == last_swiz) 423 break; 424 last_swiz = swiz; 425 426 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 427 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 428 this->uniform_size[this->uniforms]++; 429 } 430 this->uniforms++; 431 } 432} 433 434dst_reg * 435vec4_visitor::variable_storage(ir_variable *var) 436{ 437 return (dst_reg *)hash_table_find(this->variable_ht, var); 438} 439 440void 441vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 442{ 443 ir_expression *expr = ir->as_expression(); 444 445 if (expr) { 446 src_reg op[2]; 447 vec4_instruction *inst; 448 449 assert(expr->get_num_operands() <= 2); 450 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 451 assert(expr->operands[i]->type->is_scalar()); 452 453 expr->operands[i]->accept(this); 454 op[i] = this->result; 455 } 456 457 switch (expr->operation) { 458 case ir_unop_logic_not: 459 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 460 inst->conditional_mod = BRW_CONDITIONAL_Z; 461 break; 462 463 case ir_binop_logic_xor: 464 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 465 inst->conditional_mod = BRW_CONDITIONAL_NZ; 466 break; 467 468 case ir_binop_logic_or: 469 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 470 inst->conditional_mod = BRW_CONDITIONAL_NZ; 471 break; 472 473 case ir_binop_logic_and: 474 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 475 inst->conditional_mod = BRW_CONDITIONAL_NZ; 476 break; 477 478 case ir_unop_f2b: 479 if (intel->gen >= 6) { 480 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 481 } else { 482 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 483 } 484 inst->conditional_mod = BRW_CONDITIONAL_NZ; 485 break; 486 487 case ir_unop_i2b: 488 if (intel->gen >= 6) { 489 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 490 } else { 491 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 492 } 493 inst->conditional_mod = BRW_CONDITIONAL_NZ; 494 break; 495 496 case ir_binop_greater: 497 case ir_binop_gequal: 498 case ir_binop_less: 499 case ir_binop_lequal: 500 case ir_binop_equal: 501 case ir_binop_all_equal: 502 case ir_binop_nequal: 503 case ir_binop_any_nequal: 504 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 505 inst->conditional_mod = 506 brw_conditional_for_comparison(expr->operation); 507 break; 508 509 default: 510 assert(!"not reached"); 511 break; 512 } 513 return; 514 } 515 516 ir->accept(this); 517 518 if (intel->gen >= 6) { 519 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 520 this->result, src_reg(1)); 521 inst->conditional_mod = BRW_CONDITIONAL_NZ; 522 } else { 523 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 524 inst->conditional_mod = BRW_CONDITIONAL_NZ; 525 } 526} 527 528/** 529 * Emit a gen6 IF statement with the comparison folded into the IF 530 * instruction. 531 */ 532void 533vec4_visitor::emit_if_gen6(ir_if *ir) 534{ 535 ir_expression *expr = ir->condition->as_expression(); 536 537 if (expr) { 538 src_reg op[2]; 539 vec4_instruction *inst; 540 dst_reg temp; 541 542 assert(expr->get_num_operands() <= 2); 543 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 544 assert(expr->operands[i]->type->is_scalar() || 545 expr->operation == ir_binop_any_nequal || 546 expr->operation == ir_binop_all_equal); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_nequal: 593 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 594 inst->conditional_mod = 595 brw_conditional_for_comparison(expr->operation); 596 return; 597 598 case ir_binop_all_equal: 599 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 600 inst->conditional_mod = BRW_CONDITIONAL_Z; 601 602 inst = emit(BRW_OPCODE_IF); 603 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 604 return; 605 606 case ir_binop_any_nequal: 607 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 608 inst->conditional_mod = BRW_CONDITIONAL_NZ; 609 610 inst = emit(BRW_OPCODE_IF); 611 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 612 return; 613 614 default: 615 assert(!"not reached"); 616 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 617 inst->conditional_mod = BRW_CONDITIONAL_NZ; 618 return; 619 } 620 return; 621 } 622 623 ir->condition->accept(this); 624 625 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 626 this->result, src_reg(0)); 627 inst->conditional_mod = BRW_CONDITIONAL_NZ; 628} 629 630void 631vec4_visitor::visit(ir_variable *ir) 632{ 633 dst_reg *reg = NULL; 634 635 if (variable_storage(ir)) 636 return; 637 638 switch (ir->mode) { 639 case ir_var_in: 640 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 641 break; 642 643 case ir_var_out: 644 reg = new(mem_ctx) dst_reg(this, ir->type); 645 646 for (int i = 0; i < type_size(ir->type); i++) { 647 output_reg[ir->location + i] = *reg; 648 output_reg[ir->location + i].reg_offset = i; 649 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; 650 } 651 break; 652 653 case ir_var_auto: 654 case ir_var_temporary: 655 reg = new(mem_ctx) dst_reg(this, ir->type); 656 break; 657 658 case ir_var_uniform: 659 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 660 661 if (!strncmp(ir->name, "gl_", 3)) { 662 setup_builtin_uniform_values(ir); 663 } else { 664 setup_uniform_values(ir->location, ir->type); 665 } 666 break; 667 668 default: 669 assert(!"not reached"); 670 } 671 672 reg->type = brw_type_for_base_type(ir->type); 673 hash_table_insert(this->variable_ht, reg, ir); 674} 675 676void 677vec4_visitor::visit(ir_loop *ir) 678{ 679 ir_dereference_variable *counter = NULL; 680 681 fail("not yet\n"); 682 683 /* We don't want debugging output to print the whole body of the 684 * loop as the annotation. 685 */ 686 this->base_ir = NULL; 687 688 if (ir->counter != NULL) 689 counter = new(ir) ir_dereference_variable(ir->counter); 690 691 if (ir->from != NULL) { 692 assert(ir->counter != NULL); 693 694 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 695 696 a->accept(this); 697 delete a; 698 } 699 700 emit(BRW_OPCODE_DO); 701 702 if (ir->to) { 703 ir_expression *e = 704 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 705 counter, ir->to); 706 ir_if *if_stmt = new(ir) ir_if(e); 707 708 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 709 710 if_stmt->then_instructions.push_tail(brk); 711 712 if_stmt->accept(this); 713 714 delete if_stmt; 715 delete e; 716 delete brk; 717 } 718 719 visit_instructions(&ir->body_instructions); 720 721 if (ir->increment) { 722 ir_expression *e = 723 new(ir) ir_expression(ir_binop_add, counter->type, 724 counter, ir->increment); 725 726 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 727 728 a->accept(this); 729 delete a; 730 delete e; 731 } 732 733 emit(BRW_OPCODE_WHILE); 734} 735 736void 737vec4_visitor::visit(ir_loop_jump *ir) 738{ 739 switch (ir->mode) { 740 case ir_loop_jump::jump_break: 741 emit(BRW_OPCODE_BREAK); 742 break; 743 case ir_loop_jump::jump_continue: 744 emit(BRW_OPCODE_CONTINUE); 745 break; 746 } 747} 748 749 750void 751vec4_visitor::visit(ir_function_signature *ir) 752{ 753 assert(0); 754 (void)ir; 755} 756 757void 758vec4_visitor::visit(ir_function *ir) 759{ 760 /* Ignore function bodies other than main() -- we shouldn't see calls to 761 * them since they should all be inlined. 762 */ 763 if (strcmp(ir->name, "main") == 0) { 764 const ir_function_signature *sig; 765 exec_list empty; 766 767 sig = ir->matching_signature(&empty); 768 769 assert(sig); 770 771 visit_instructions(&sig->body); 772 } 773} 774 775GLboolean 776vec4_visitor::try_emit_sat(ir_expression *ir) 777{ 778 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 779 if (!sat_src) 780 return false; 781 782 sat_src->accept(this); 783 src_reg src = this->result; 784 785 this->result = src_reg(this, ir->type); 786 vec4_instruction *inst; 787 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 788 inst->saturate = true; 789 790 return true; 791} 792 793void 794vec4_visitor::emit_bool_comparison(unsigned int op, 795 dst_reg dst, src_reg src0, src_reg src1) 796{ 797 /* original gen4 does destination conversion before comparison. */ 798 if (intel->gen < 5) 799 dst.type = src0.type; 800 801 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 802 inst->conditional_mod = brw_conditional_for_comparison(op); 803 804 dst.type = BRW_REGISTER_TYPE_D; 805 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 806} 807 808void 809vec4_visitor::visit(ir_expression *ir) 810{ 811 unsigned int operand; 812 src_reg op[Elements(ir->operands)]; 813 src_reg result_src; 814 dst_reg result_dst; 815 vec4_instruction *inst; 816 817 if (try_emit_sat(ir)) 818 return; 819 820 for (operand = 0; operand < ir->get_num_operands(); operand++) { 821 this->result.file = BAD_FILE; 822 ir->operands[operand]->accept(this); 823 if (this->result.file == BAD_FILE) { 824 printf("Failed to get tree for expression operand:\n"); 825 ir->operands[operand]->print(); 826 exit(1); 827 } 828 op[operand] = this->result; 829 830 /* Matrix expression operands should have been broken down to vector 831 * operations already. 832 */ 833 assert(!ir->operands[operand]->type->is_matrix()); 834 } 835 836 int vector_elements = ir->operands[0]->type->vector_elements; 837 if (ir->operands[1]) { 838 vector_elements = MAX2(vector_elements, 839 ir->operands[1]->type->vector_elements); 840 } 841 842 this->result.file = BAD_FILE; 843 844 /* Storage for our result. Ideally for an assignment we'd be using 845 * the actual storage for the result here, instead. 846 */ 847 result_src = src_reg(this, ir->type); 848 /* convenience for the emit functions below. */ 849 result_dst = dst_reg(result_src); 850 /* If nothing special happens, this is the result. */ 851 this->result = result_src; 852 /* Limit writes to the channels that will be used by result_src later. 853 * This does limit this temp's use as a temporary for multi-instruction 854 * sequences. 855 */ 856 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 857 858 switch (ir->operation) { 859 case ir_unop_logic_not: 860 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 861 * ones complement of the whole register, not just bit 0. 862 */ 863 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 864 break; 865 case ir_unop_neg: 866 op[0].negate = !op[0].negate; 867 this->result = op[0]; 868 break; 869 case ir_unop_abs: 870 op[0].abs = true; 871 op[0].negate = false; 872 this->result = op[0]; 873 break; 874 875 case ir_unop_sign: 876 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 877 878 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 879 inst->conditional_mod = BRW_CONDITIONAL_G; 880 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 881 inst->predicate = BRW_PREDICATE_NORMAL; 882 883 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 884 inst->conditional_mod = BRW_CONDITIONAL_L; 885 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 886 inst->predicate = BRW_PREDICATE_NORMAL; 887 888 break; 889 890 case ir_unop_rcp: 891 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 892 break; 893 894 case ir_unop_exp2: 895 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 896 break; 897 case ir_unop_log2: 898 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 899 break; 900 case ir_unop_exp: 901 case ir_unop_log: 902 assert(!"not reached: should be handled by ir_explog_to_explog2"); 903 break; 904 case ir_unop_sin: 905 case ir_unop_sin_reduced: 906 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 907 break; 908 case ir_unop_cos: 909 case ir_unop_cos_reduced: 910 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 911 break; 912 913 case ir_unop_dFdx: 914 case ir_unop_dFdy: 915 assert(!"derivatives not valid in vertex shader"); 916 break; 917 918 case ir_unop_noise: 919 assert(!"not reached: should be handled by lower_noise"); 920 break; 921 922 case ir_binop_add: 923 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 924 break; 925 case ir_binop_sub: 926 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 927 break; 928 929 case ir_binop_mul: 930 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 931 break; 932 case ir_binop_div: 933 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 934 case ir_binop_mod: 935 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 936 break; 937 938 case ir_binop_less: 939 case ir_binop_greater: 940 case ir_binop_lequal: 941 case ir_binop_gequal: 942 case ir_binop_equal: 943 case ir_binop_nequal: { 944 dst_reg temp = result_dst; 945 /* original gen4 does implicit conversion before comparison. */ 946 if (intel->gen < 5) 947 temp.type = op[0].type; 948 949 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 950 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 951 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 952 break; 953 } 954 955 case ir_binop_all_equal: 956 /* "==" operator producing a scalar boolean. */ 957 if (ir->operands[0]->type->is_vector() || 958 ir->operands[1]->type->is_vector()) { 959 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 960 inst->conditional_mod = BRW_CONDITIONAL_Z; 961 962 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 963 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 964 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 965 } else { 966 dst_reg temp = result_dst; 967 /* original gen4 does implicit conversion before comparison. */ 968 if (intel->gen < 5) 969 temp.type = op[0].type; 970 971 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 972 inst->conditional_mod = BRW_CONDITIONAL_NZ; 973 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 974 } 975 break; 976 case ir_binop_any_nequal: 977 /* "!=" operator producing a scalar boolean. */ 978 if (ir->operands[0]->type->is_vector() || 979 ir->operands[1]->type->is_vector()) { 980 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 981 inst->conditional_mod = BRW_CONDITIONAL_NZ; 982 983 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 984 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 985 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 986 } else { 987 dst_reg temp = result_dst; 988 /* original gen4 does implicit conversion before comparison. */ 989 if (intel->gen < 5) 990 temp.type = op[0].type; 991 992 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 993 inst->conditional_mod = BRW_CONDITIONAL_NZ; 994 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 995 } 996 break; 997 998 case ir_unop_any: 999 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 1000 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1001 1002 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1003 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1004 break; 1005 1006 case ir_binop_logic_xor: 1007 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1008 break; 1009 1010 case ir_binop_logic_or: 1011 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1012 break; 1013 1014 case ir_binop_logic_and: 1015 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1016 break; 1017 1018 case ir_binop_dot: 1019 assert(ir->operands[0]->type->is_vector()); 1020 assert(ir->operands[0]->type == ir->operands[1]->type); 1021 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1022 break; 1023 1024 case ir_unop_sqrt: 1025 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1026 break; 1027 case ir_unop_rsq: 1028 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1029 break; 1030 case ir_unop_i2f: 1031 case ir_unop_i2u: 1032 case ir_unop_u2i: 1033 case ir_unop_u2f: 1034 case ir_unop_b2f: 1035 case ir_unop_b2i: 1036 case ir_unop_f2i: 1037 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1038 break; 1039 case ir_unop_f2b: 1040 case ir_unop_i2b: { 1041 dst_reg temp = result_dst; 1042 /* original gen4 does implicit conversion before comparison. */ 1043 if (intel->gen < 5) 1044 temp.type = op[0].type; 1045 1046 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1047 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1048 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1049 break; 1050 } 1051 1052 case ir_unop_trunc: 1053 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1054 break; 1055 case ir_unop_ceil: 1056 op[0].negate = !op[0].negate; 1057 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1058 this->result.negate = true; 1059 break; 1060 case ir_unop_floor: 1061 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1062 break; 1063 case ir_unop_fract: 1064 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1065 break; 1066 case ir_unop_round_even: 1067 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1068 break; 1069 1070 case ir_binop_min: 1071 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1072 inst->conditional_mod = BRW_CONDITIONAL_L; 1073 1074 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1075 inst->predicate = BRW_PREDICATE_NORMAL; 1076 break; 1077 case ir_binop_max: 1078 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1079 inst->conditional_mod = BRW_CONDITIONAL_G; 1080 1081 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1082 inst->predicate = BRW_PREDICATE_NORMAL; 1083 break; 1084 1085 case ir_binop_pow: 1086 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1087 break; 1088 1089 case ir_unop_bit_not: 1090 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1091 break; 1092 case ir_binop_bit_and: 1093 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1094 break; 1095 case ir_binop_bit_xor: 1096 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1097 break; 1098 case ir_binop_bit_or: 1099 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1100 break; 1101 1102 case ir_binop_lshift: 1103 case ir_binop_rshift: 1104 assert(!"GLSL 1.30 features unsupported"); 1105 break; 1106 1107 case ir_quadop_vector: 1108 assert(!"not reached: should be handled by lower_quadop_vector"); 1109 break; 1110 } 1111} 1112 1113 1114void 1115vec4_visitor::visit(ir_swizzle *ir) 1116{ 1117 src_reg src; 1118 int i = 0; 1119 int swizzle[4]; 1120 1121 /* Note that this is only swizzles in expressions, not those on the left 1122 * hand side of an assignment, which do write masking. See ir_assignment 1123 * for that. 1124 */ 1125 1126 ir->val->accept(this); 1127 src = this->result; 1128 assert(src.file != BAD_FILE); 1129 1130 for (i = 0; i < ir->type->vector_elements; i++) { 1131 switch (i) { 1132 case 0: 1133 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1134 break; 1135 case 1: 1136 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1137 break; 1138 case 2: 1139 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1140 break; 1141 case 3: 1142 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1143 break; 1144 } 1145 } 1146 for (; i < 4; i++) { 1147 /* Replicate the last channel out. */ 1148 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1149 } 1150 1151 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1152 1153 this->result = src; 1154} 1155 1156void 1157vec4_visitor::visit(ir_dereference_variable *ir) 1158{ 1159 const struct glsl_type *type = ir->type; 1160 dst_reg *reg = variable_storage(ir->var); 1161 1162 if (!reg) { 1163 fail("Failed to find variable storage for %s\n", ir->var->name); 1164 this->result = src_reg(brw_null_reg()); 1165 return; 1166 } 1167 1168 this->result = src_reg(*reg); 1169 1170 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1171 this->result.swizzle = swizzle_for_size(type->vector_elements); 1172} 1173 1174void 1175vec4_visitor::visit(ir_dereference_array *ir) 1176{ 1177 ir_constant *constant_index; 1178 src_reg src; 1179 int element_size = type_size(ir->type); 1180 1181 constant_index = ir->array_index->constant_expression_value(); 1182 1183 ir->array->accept(this); 1184 src = this->result; 1185 1186 if (constant_index) { 1187 src.reg_offset += constant_index->value.i[0] * element_size; 1188 } else { 1189#if 0 /* Variable array index */ 1190 /* Variable index array dereference. It eats the "vec4" of the 1191 * base of the array and an index that offsets the Mesa register 1192 * index. 1193 */ 1194 ir->array_index->accept(this); 1195 1196 src_reg index_reg; 1197 1198 if (element_size == 1) { 1199 index_reg = this->result; 1200 } else { 1201 index_reg = src_reg(this, glsl_type::float_type); 1202 1203 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1204 this->result, src_reg_for_float(element_size)); 1205 } 1206 1207 src.reladdr = ralloc(mem_ctx, src_reg); 1208 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1209#endif 1210 } 1211 1212 /* If the type is smaller than a vec4, replicate the last channel out. */ 1213 if (ir->type->is_scalar() || ir->type->is_vector()) 1214 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1215 else 1216 src.swizzle = BRW_SWIZZLE_NOOP; 1217 src.type = brw_type_for_base_type(ir->type); 1218 1219 this->result = src; 1220} 1221 1222void 1223vec4_visitor::visit(ir_dereference_record *ir) 1224{ 1225 unsigned int i; 1226 const glsl_type *struct_type = ir->record->type; 1227 int offset = 0; 1228 1229 ir->record->accept(this); 1230 1231 for (i = 0; i < struct_type->length; i++) { 1232 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1233 break; 1234 offset += type_size(struct_type->fields.structure[i].type); 1235 } 1236 1237 /* If the type is smaller than a vec4, replicate the last channel out. */ 1238 if (ir->type->is_scalar() || ir->type->is_vector()) 1239 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1240 else 1241 this->result.swizzle = BRW_SWIZZLE_NOOP; 1242 this->result.type = brw_type_for_base_type(ir->type); 1243 1244 this->result.reg_offset += offset; 1245} 1246 1247/** 1248 * We want to be careful in assignment setup to hit the actual storage 1249 * instead of potentially using a temporary like we might with the 1250 * ir_dereference handler. 1251 */ 1252static dst_reg 1253get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1254{ 1255 /* The LHS must be a dereference. If the LHS is a variable indexed array 1256 * access of a vector, it must be separated into a series conditional moves 1257 * before reaching this point (see ir_vec_index_to_cond_assign). 1258 */ 1259 assert(ir->as_dereference()); 1260 ir_dereference_array *deref_array = ir->as_dereference_array(); 1261 if (deref_array) { 1262 assert(!deref_array->array->type->is_vector()); 1263 } 1264 1265 /* Use the rvalue deref handler for the most part. We'll ignore 1266 * swizzles in it and write swizzles using writemask, though. 1267 */ 1268 ir->accept(v); 1269 return dst_reg(v->result); 1270} 1271 1272void 1273vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, 1274 const struct glsl_type *type, bool predicated) 1275{ 1276 if (type->base_type == GLSL_TYPE_STRUCT) { 1277 for (unsigned int i = 0; i < type->length; i++) { 1278 emit_block_move(dst, src, type->fields.structure[i].type, predicated); 1279 } 1280 return; 1281 } 1282 1283 if (type->is_array()) { 1284 for (unsigned int i = 0; i < type->length; i++) { 1285 emit_block_move(dst, src, type->fields.array, predicated); 1286 } 1287 return; 1288 } 1289 1290 if (type->is_matrix()) { 1291 const struct glsl_type *vec_type; 1292 1293 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 1294 type->vector_elements, 1); 1295 1296 for (int i = 0; i < type->matrix_columns; i++) { 1297 emit_block_move(dst, src, vec_type, predicated); 1298 } 1299 return; 1300 } 1301 1302 assert(type->is_scalar() || type->is_vector()); 1303 1304 dst->type = brw_type_for_base_type(type); 1305 src->type = dst->type; 1306 1307 dst->writemask = (1 << type->vector_elements) - 1; 1308 1309 /* Do we need to worry about swizzling a swizzle? */ 1310 assert(src->swizzle = BRW_SWIZZLE_NOOP); 1311 src->swizzle = swizzle_for_size(type->vector_elements); 1312 1313 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); 1314 if (predicated) 1315 inst->predicate = BRW_PREDICATE_NORMAL; 1316 1317 dst->reg_offset++; 1318 src->reg_offset++; 1319} 1320 1321void 1322vec4_visitor::visit(ir_assignment *ir) 1323{ 1324 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1325 1326 if (!ir->lhs->type->is_scalar() && 1327 !ir->lhs->type->is_vector()) { 1328 ir->rhs->accept(this); 1329 src_reg src = this->result; 1330 1331 if (ir->condition) { 1332 emit_bool_to_cond_code(ir->condition); 1333 } 1334 1335 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); 1336 return; 1337 } 1338 1339 /* Now we're down to just a scalar/vector with writemasks. */ 1340 int i; 1341 1342 ir->rhs->accept(this); 1343 src_reg src = this->result; 1344 1345 int swizzles[4]; 1346 int first_enabled_chan = 0; 1347 int src_chan = 0; 1348 1349 assert(ir->lhs->type->is_vector() || 1350 ir->lhs->type->is_scalar()); 1351 dst.writemask = ir->write_mask; 1352 1353 for (int i = 0; i < 4; i++) { 1354 if (dst.writemask & (1 << i)) { 1355 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1356 break; 1357 } 1358 } 1359 1360 /* Swizzle a small RHS vector into the channels being written. 1361 * 1362 * glsl ir treats write_mask as dictating how many channels are 1363 * present on the RHS while in our instructions we need to make 1364 * those channels appear in the slots of the vec4 they're written to. 1365 */ 1366 for (int i = 0; i < 4; i++) { 1367 if (dst.writemask & (1 << i)) 1368 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1369 else 1370 swizzles[i] = first_enabled_chan; 1371 } 1372 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1373 swizzles[2], swizzles[3]); 1374 1375 if (ir->condition) { 1376 emit_bool_to_cond_code(ir->condition); 1377 } 1378 1379 for (i = 0; i < type_size(ir->lhs->type); i++) { 1380 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1381 1382 if (ir->condition) 1383 inst->predicate = BRW_PREDICATE_NORMAL; 1384 1385 dst.reg_offset++; 1386 src.reg_offset++; 1387 } 1388} 1389 1390void 1391vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) 1392{ 1393 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1394 foreach_list(node, &ir->components) { 1395 ir_constant *field_value = (ir_constant *)node; 1396 1397 emit_constant_values(dst, field_value); 1398 } 1399 return; 1400 } 1401 1402 if (ir->type->is_array()) { 1403 for (unsigned int i = 0; i < ir->type->length; i++) { 1404 emit_constant_values(dst, ir->array_elements[i]); 1405 } 1406 return; 1407 } 1408 1409 if (ir->type->is_matrix()) { 1410 for (int i = 0; i < ir->type->matrix_columns; i++) { 1411 for (int j = 0; j < ir->type->vector_elements; j++) { 1412 dst->writemask = 1 << j; 1413 dst->type = BRW_REGISTER_TYPE_F; 1414 1415 emit(BRW_OPCODE_MOV, *dst, 1416 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1417 } 1418 dst->reg_offset++; 1419 } 1420 return; 1421 } 1422 1423 for (int i = 0; i < ir->type->vector_elements; i++) { 1424 dst->writemask = 1 << i; 1425 dst->type = brw_type_for_base_type(ir->type); 1426 1427 switch (ir->type->base_type) { 1428 case GLSL_TYPE_FLOAT: 1429 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); 1430 break; 1431 case GLSL_TYPE_INT: 1432 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); 1433 break; 1434 case GLSL_TYPE_UINT: 1435 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); 1436 break; 1437 case GLSL_TYPE_BOOL: 1438 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); 1439 break; 1440 default: 1441 assert(!"Non-float/uint/int/bool constant"); 1442 break; 1443 } 1444 } 1445 dst->reg_offset++; 1446} 1447 1448void 1449vec4_visitor::visit(ir_constant *ir) 1450{ 1451 dst_reg dst = dst_reg(this, ir->type); 1452 this->result = src_reg(dst); 1453 1454 emit_constant_values(&dst, ir); 1455} 1456 1457void 1458vec4_visitor::visit(ir_call *ir) 1459{ 1460 assert(!"not reached"); 1461} 1462 1463void 1464vec4_visitor::visit(ir_texture *ir) 1465{ 1466 assert(!"not reached"); 1467} 1468 1469void 1470vec4_visitor::visit(ir_return *ir) 1471{ 1472 assert(!"not reached"); 1473} 1474 1475void 1476vec4_visitor::visit(ir_discard *ir) 1477{ 1478 assert(!"not reached"); 1479} 1480 1481void 1482vec4_visitor::visit(ir_if *ir) 1483{ 1484 /* Don't point the annotation at the if statement, because then it plus 1485 * the then and else blocks get printed. 1486 */ 1487 this->base_ir = ir->condition; 1488 1489 if (intel->gen == 6) { 1490 emit_if_gen6(ir); 1491 } else { 1492 emit_bool_to_cond_code(ir->condition); 1493 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1494 inst->predicate = BRW_PREDICATE_NORMAL; 1495 } 1496 1497 visit_instructions(&ir->then_instructions); 1498 1499 if (!ir->else_instructions.is_empty()) { 1500 this->base_ir = ir->condition; 1501 emit(BRW_OPCODE_ELSE); 1502 1503 visit_instructions(&ir->else_instructions); 1504 } 1505 1506 this->base_ir = ir->condition; 1507 emit(BRW_OPCODE_ENDIF); 1508} 1509 1510int 1511vec4_visitor::emit_vue_header_gen4(int header_mrf) 1512{ 1513 /* Get the position */ 1514 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1515 1516 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1517 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1518 1519 current_annotation = "NDC"; 1520 dst_reg ndc_w = ndc; 1521 ndc_w.writemask = WRITEMASK_W; 1522 src_reg pos_w = pos; 1523 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1524 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1525 1526 dst_reg ndc_xyz = ndc; 1527 ndc_xyz.writemask = WRITEMASK_XYZ; 1528 1529 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1530 1531 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1532 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1533 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1534 GLuint i; 1535 1536 emit(BRW_OPCODE_MOV, header1, 0u); 1537 1538 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1539 assert(!"finishme: psiz"); 1540 src_reg psiz; 1541 1542 header1.writemask = WRITEMASK_W; 1543 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1544 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1545 } 1546 1547 for (i = 0; i < c->key.nr_userclip; i++) { 1548 vec4_instruction *inst; 1549 1550 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1551 pos, src_reg(c->userplane[i])); 1552 inst->conditional_mod = BRW_CONDITIONAL_L; 1553 1554 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1555 inst->predicate = BRW_PREDICATE_NORMAL; 1556 } 1557 1558 /* i965 clipping workaround: 1559 * 1) Test for -ve rhw 1560 * 2) If set, 1561 * set ndc = (0,0,0,0) 1562 * set ucp[6] = 1 1563 * 1564 * Later, clipping will detect ucp[6] and ensure the primitive is 1565 * clipped against all fixed planes. 1566 */ 1567 if (brw->has_negative_rhw_bug) { 1568#if 0 1569 /* FINISHME */ 1570 brw_CMP(p, 1571 vec8(brw_null_reg()), 1572 BRW_CONDITIONAL_L, 1573 brw_swizzle1(ndc, 3), 1574 brw_imm_f(0)); 1575 1576 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1577 brw_MOV(p, ndc, brw_imm_f(0)); 1578 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1579#endif 1580 } 1581 1582 header1.writemask = WRITEMASK_XYZW; 1583 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1584 } else { 1585 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1586 BRW_REGISTER_TYPE_UD), 0u); 1587 } 1588 1589 if (intel->gen == 5) { 1590 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1591 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1592 * dword 4-7 (m2) is the ndc position (set above) 1593 * dword 8-11 (m3) of the vertex header is the 4D space position 1594 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1595 * m6 is a pad so that the vertex element data is aligned 1596 * m7 is the first vertex data we fill. 1597 */ 1598 current_annotation = "NDC"; 1599 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1600 1601 current_annotation = "gl_Position"; 1602 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1603 1604 /* user clip distance. */ 1605 header_mrf += 2; 1606 1607 /* Pad so that vertex element data is aligned. */ 1608 header_mrf++; 1609 } else { 1610 /* There are 8 dwords in VUE header pre-Ironlake: 1611 * dword 0-3 (m1) is indices, point width, clip flags. 1612 * dword 4-7 (m2) is ndc position (set above) 1613 * 1614 * dword 8-11 (m3) is the first vertex data. 1615 */ 1616 current_annotation = "NDC"; 1617 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1618 1619 current_annotation = "gl_Position"; 1620 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1621 } 1622 1623 return header_mrf; 1624} 1625 1626int 1627vec4_visitor::emit_vue_header_gen6(int header_mrf) 1628{ 1629 struct brw_reg reg; 1630 1631 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1632 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1633 * dword 4-7 (m3) is the 4D space position 1634 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1635 * enabled. 1636 * 1637 * m4 or 6 is the first vertex element data we fill. 1638 */ 1639 1640 current_annotation = "indices, point width, clip flags"; 1641 reg = brw_message_reg(header_mrf++); 1642 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1643 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1644 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1645 src_reg(output_reg[VERT_RESULT_PSIZ])); 1646 } 1647 1648 current_annotation = "gl_Position"; 1649 emit(BRW_OPCODE_MOV, 1650 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1651 1652 current_annotation = "user clip distances"; 1653 if (c->key.nr_userclip) { 1654 for (int i = 0; i < c->key.nr_userclip; i++) { 1655 struct brw_reg m; 1656 if (i < 4) 1657 m = brw_message_reg(header_mrf); 1658 else 1659 m = brw_message_reg(header_mrf + 1); 1660 1661 emit(BRW_OPCODE_DP4, 1662 dst_reg(brw_writemask(m, 1 << (i & 3))), 1663 src_reg(c->userplane[i])); 1664 } 1665 header_mrf += 2; 1666 } 1667 1668 current_annotation = NULL; 1669 1670 return header_mrf; 1671} 1672 1673static int 1674align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1675{ 1676 struct intel_context *intel = &brw->intel; 1677 1678 if (intel->gen >= 6) { 1679 /* URB data written (does not include the message header reg) must 1680 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1681 * section 5.4.3.2.2: URB_INTERLEAVED. 1682 * 1683 * URB entries are allocated on a multiple of 1024 bits, so an 1684 * extra 128 bits written here to make the end align to 256 is 1685 * no problem. 1686 */ 1687 if ((mlen % 2) != 1) 1688 mlen++; 1689 } 1690 1691 return mlen; 1692} 1693 1694/** 1695 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1696 * complete the VS thread. 1697 * 1698 * The VUE layout is documented in Volume 2a. 1699 */ 1700void 1701vec4_visitor::emit_urb_writes() 1702{ 1703 int base_mrf = 1; 1704 int mrf = base_mrf; 1705 int urb_entry_size; 1706 1707 /* FINISHME: edgeflag */ 1708 1709 /* First mrf is the g0-based message header containing URB handles and such, 1710 * which is implied in VS_OPCODE_URB_WRITE. 1711 */ 1712 mrf++; 1713 1714 if (intel->gen >= 6) { 1715 mrf = emit_vue_header_gen6(mrf); 1716 } else { 1717 mrf = emit_vue_header_gen4(mrf); 1718 } 1719 1720 int attr; 1721 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1722 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1723 continue; 1724 1725 /* This is set up in the VUE header. */ 1726 if (attr == VERT_RESULT_HPOS) 1727 continue; 1728 1729 /* This is loaded into the VUE header, and thus doesn't occupy 1730 * an attribute slot. 1731 */ 1732 if (attr == VERT_RESULT_PSIZ) 1733 continue; 1734 1735 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1736 1737 /* If this is MRF 15, we can't fit anything more into this URB 1738 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1739 * even-numbered amount of URB write data, which will meet 1740 * gen6's requirements for length alignment. 1741 */ 1742 if (mrf == 15) 1743 break; 1744 } 1745 1746 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1747 inst->base_mrf = base_mrf; 1748 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1749 inst->eot = true; 1750 1751 urb_entry_size = mrf - base_mrf; 1752 1753 for (; attr < VERT_RESULT_MAX; attr++) { 1754 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1755 continue; 1756 fail("Second URB write not supported.\n"); 1757 break; 1758 } 1759 1760 if (intel->gen == 6) 1761 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1762 else 1763 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1764} 1765 1766vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1767 struct gl_shader_program *prog, 1768 struct brw_shader *shader) 1769{ 1770 this->c = c; 1771 this->p = &c->func; 1772 this->brw = p->brw; 1773 this->intel = &brw->intel; 1774 this->ctx = &intel->ctx; 1775 this->prog = prog; 1776 this->shader = shader; 1777 1778 this->mem_ctx = ralloc_context(NULL); 1779 this->failed = false; 1780 1781 this->base_ir = NULL; 1782 this->current_annotation = NULL; 1783 1784 this->c = c; 1785 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1786 this->prog_data = &c->prog_data; 1787 1788 this->variable_ht = hash_table_ctor(0, 1789 hash_table_pointer_hash, 1790 hash_table_pointer_compare); 1791 1792 this->virtual_grf_sizes = NULL; 1793 this->virtual_grf_count = 0; 1794 this->virtual_grf_array_size = 0; 1795 1796 this->uniforms = 0; 1797 1798 this->variable_ht = hash_table_ctor(0, 1799 hash_table_pointer_hash, 1800 hash_table_pointer_compare); 1801} 1802 1803vec4_visitor::~vec4_visitor() 1804{ 1805 hash_table_dtor(this->variable_ht); 1806} 1807 1808 1809void 1810vec4_visitor::fail(const char *format, ...) 1811{ 1812 va_list va; 1813 char *msg; 1814 1815 if (failed) 1816 return; 1817 1818 failed = true; 1819 1820 va_start(va, format); 1821 msg = ralloc_vasprintf(mem_ctx, format, va); 1822 va_end(va); 1823 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1824 1825 this->fail_msg = msg; 1826 1827 if (INTEL_DEBUG & DEBUG_VS) { 1828 fprintf(stderr, "%s", msg); 1829 } 1830} 1831 1832} /* namespace brw */ 1833