brw_vec4_visitor.cpp revision 930afd1774bdcd013bccbd7b5717ae0bb8e3dea3
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 41 int swizzles[4]; 42 int next_chan = 0; 43 int last = 0; 44 45 for (int i = 0; i < 4; i++) { 46 if (!(reg.writemask & (1 << i))) 47 continue; 48 49 swizzles[next_chan++] = last = i; 50 } 51 52 for (; next_chan < 4; next_chan++) { 53 swizzles[next_chan] = last; 54 } 55 56 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 57 swizzles[2], swizzles[3]); 58} 59 60dst_reg::dst_reg(src_reg reg) 61{ 62 init(); 63 64 this->file = reg.file; 65 this->reg = reg.reg; 66 this->reg_offset = reg.reg_offset; 67 this->type = reg.type; 68 this->writemask = WRITEMASK_XYZW; 69} 70 71vec4_instruction * 72vec4_visitor::emit(enum opcode opcode, dst_reg dst, 73 src_reg src0, src_reg src1, src_reg src2) 74{ 75 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 76 77 inst->opcode = opcode; 78 inst->dst = dst; 79 inst->src[0] = src0; 80 inst->src[1] = src1; 81 inst->src[2] = src2; 82 inst->ir = this->base_ir; 83 inst->annotation = this->current_annotation; 84 85 this->instructions.push_tail(inst); 86 87 return inst; 88} 89 90 91vec4_instruction * 92vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 93{ 94 return emit(opcode, dst, src0, src1, src_reg()); 95} 96 97vec4_instruction * 98vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 99{ 100 assert(dst.writemask != 0); 101 return emit(opcode, dst, src0, src_reg(), src_reg()); 102} 103 104vec4_instruction * 105vec4_visitor::emit(enum opcode opcode) 106{ 107 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 108} 109 110void 111vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 112{ 113 static enum opcode dot_opcodes[] = { 114 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 115 }; 116 117 emit(dot_opcodes[elements - 2], dst, src0, src1); 118} 119 120void 121vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 122{ 123 /* The gen6 math instruction ignores the source modifiers -- 124 * swizzle, abs, negate, and at least some parts of the register 125 * region description. 126 */ 127 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 128 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 129 130 emit(opcode, dst, temp_src); 131} 132 133void 134vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 135{ 136 vec4_instruction *inst = emit(opcode, dst, src); 137 inst->base_mrf = 1; 138 inst->mlen = 1; 139} 140 141void 142vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 143{ 144 switch (opcode) { 145 case SHADER_OPCODE_RCP: 146 case SHADER_OPCODE_RSQ: 147 case SHADER_OPCODE_SQRT: 148 case SHADER_OPCODE_EXP2: 149 case SHADER_OPCODE_LOG2: 150 case SHADER_OPCODE_SIN: 151 case SHADER_OPCODE_COS: 152 break; 153 default: 154 assert(!"not reached: bad math opcode"); 155 return; 156 } 157 158 if (intel->gen >= 6) { 159 return emit_math1_gen6(opcode, dst, src); 160 } else { 161 return emit_math1_gen4(opcode, dst, src); 162 } 163} 164 165void 166vec4_visitor::emit_math2_gen6(enum opcode opcode, 167 dst_reg dst, src_reg src0, src_reg src1) 168{ 169 src_reg expanded; 170 171 /* The gen6 math instruction ignores the source modifiers -- 172 * swizzle, abs, negate, and at least some parts of the register 173 * region description. Move the sources to temporaries to make it 174 * generally work. 175 */ 176 177 expanded = src_reg(this, glsl_type::vec4_type); 178 emit(BRW_OPCODE_MOV, dst, src0); 179 src0 = expanded; 180 181 expanded = src_reg(this, glsl_type::vec4_type); 182 emit(BRW_OPCODE_MOV, dst, src1); 183 src1 = expanded; 184 185 emit(opcode, dst, src0, src1); 186} 187 188void 189vec4_visitor::emit_math2_gen4(enum opcode opcode, 190 dst_reg dst, src_reg src0, src_reg src1) 191{ 192 vec4_instruction *inst = emit(opcode, dst, src0, src1); 193 inst->base_mrf = 1; 194 inst->mlen = 2; 195} 196 197void 198vec4_visitor::emit_math(enum opcode opcode, 199 dst_reg dst, src_reg src0, src_reg src1) 200{ 201 assert(opcode == SHADER_OPCODE_POW); 202 203 if (intel->gen >= 6) { 204 return emit_math2_gen6(opcode, dst, src0, src1); 205 } else { 206 return emit_math2_gen4(opcode, dst, src0, src1); 207 } 208} 209 210void 211vec4_visitor::visit_instructions(const exec_list *list) 212{ 213 foreach_iter(exec_list_iterator, iter, *list) { 214 ir_instruction *ir = (ir_instruction *)iter.get(); 215 216 base_ir = ir; 217 ir->accept(this); 218 } 219} 220 221 222static int 223type_size(const struct glsl_type *type) 224{ 225 unsigned int i; 226 int size; 227 228 switch (type->base_type) { 229 case GLSL_TYPE_UINT: 230 case GLSL_TYPE_INT: 231 case GLSL_TYPE_FLOAT: 232 case GLSL_TYPE_BOOL: 233 if (type->is_matrix()) { 234 return type->matrix_columns; 235 } else { 236 /* Regardless of size of vector, it gets a vec4. This is bad 237 * packing for things like floats, but otherwise arrays become a 238 * mess. Hopefully a later pass over the code can pack scalars 239 * down if appropriate. 240 */ 241 return 1; 242 } 243 case GLSL_TYPE_ARRAY: 244 assert(type->length > 0); 245 return type_size(type->fields.array) * type->length; 246 case GLSL_TYPE_STRUCT: 247 size = 0; 248 for (i = 0; i < type->length; i++) { 249 size += type_size(type->fields.structure[i].type); 250 } 251 return size; 252 case GLSL_TYPE_SAMPLER: 253 /* Samplers take up one slot in UNIFORMS[], but they're baked in 254 * at link time. 255 */ 256 return 1; 257 default: 258 assert(0); 259 return 0; 260 } 261} 262 263int 264vec4_visitor::virtual_grf_alloc(int size) 265{ 266 if (virtual_grf_array_size <= virtual_grf_count) { 267 if (virtual_grf_array_size == 0) 268 virtual_grf_array_size = 16; 269 else 270 virtual_grf_array_size *= 2; 271 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 272 virtual_grf_array_size); 273 } 274 virtual_grf_sizes[virtual_grf_count] = size; 275 return virtual_grf_count++; 276} 277 278src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 279{ 280 init(); 281 282 this->file = GRF; 283 this->reg = v->virtual_grf_alloc(type_size(type)); 284 285 if (type->is_array() || type->is_record()) { 286 this->swizzle = BRW_SWIZZLE_NOOP; 287 } else { 288 this->swizzle = swizzle_for_size(type->vector_elements); 289 } 290 291 this->type = brw_type_for_base_type(type); 292} 293 294dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 295{ 296 init(); 297 298 this->file = GRF; 299 this->reg = v->virtual_grf_alloc(type_size(type)); 300 301 if (type->is_array() || type->is_record()) { 302 this->writemask = WRITEMASK_XYZW; 303 } else { 304 this->writemask = (1 << type->vector_elements) - 1; 305 } 306 307 this->type = brw_type_for_base_type(type); 308} 309 310/* Our support for uniforms is piggy-backed on the struct 311 * gl_fragment_program, because that's where the values actually 312 * get stored, rather than in some global gl_shader_program uniform 313 * store. 314 */ 315int 316vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 317{ 318 unsigned int offset = 0; 319 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 320 321 if (type->is_matrix()) { 322 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 323 type->vector_elements, 324 1); 325 326 for (unsigned int i = 0; i < type->matrix_columns; i++) { 327 offset += setup_uniform_values(loc + offset, column); 328 } 329 330 return offset; 331 } 332 333 switch (type->base_type) { 334 case GLSL_TYPE_FLOAT: 335 case GLSL_TYPE_UINT: 336 case GLSL_TYPE_INT: 337 case GLSL_TYPE_BOOL: 338 for (unsigned int i = 0; i < type->vector_elements; i++) { 339 int slot = this->uniforms * 4 + i; 340 switch (type->base_type) { 341 case GLSL_TYPE_FLOAT: 342 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 343 break; 344 case GLSL_TYPE_UINT: 345 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 346 break; 347 case GLSL_TYPE_INT: 348 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 349 break; 350 case GLSL_TYPE_BOOL: 351 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 352 break; 353 default: 354 assert(!"not reached"); 355 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 356 break; 357 } 358 c->prog_data.param[slot] = &values[i]; 359 } 360 361 for (unsigned int i = type->vector_elements; i < 4; i++) { 362 c->prog_data.param_convert[this->uniforms * 4 + i] = 363 PARAM_CONVERT_ZERO; 364 c->prog_data.param[this->uniforms * 4 + i] = NULL; 365 } 366 367 this->uniform_size[this->uniforms] = type->vector_elements; 368 this->uniforms++; 369 370 return 1; 371 372 case GLSL_TYPE_STRUCT: 373 for (unsigned int i = 0; i < type->length; i++) { 374 offset += setup_uniform_values(loc + offset, 375 type->fields.structure[i].type); 376 } 377 return offset; 378 379 case GLSL_TYPE_ARRAY: 380 for (unsigned int i = 0; i < type->length; i++) { 381 offset += setup_uniform_values(loc + offset, type->fields.array); 382 } 383 return offset; 384 385 case GLSL_TYPE_SAMPLER: 386 /* The sampler takes up a slot, but we don't use any values from it. */ 387 return 1; 388 389 default: 390 assert(!"not reached"); 391 return 0; 392 } 393} 394 395/* Our support for builtin uniforms is even scarier than non-builtin. 396 * It sits on top of the PROG_STATE_VAR parameters that are 397 * automatically updated from GL context state. 398 */ 399void 400vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 401{ 402 const ir_state_slot *const slots = ir->state_slots; 403 assert(ir->state_slots != NULL); 404 405 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 406 /* This state reference has already been setup by ir_to_mesa, 407 * but we'll get the same index back here. We can reference 408 * ParameterValues directly, since unlike brw_fs.cpp, we never 409 * add new state references during compile. 410 */ 411 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 412 (gl_state_index *)slots[i].tokens); 413 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 414 415 this->uniform_size[this->uniforms] = 0; 416 /* Add each of the unique swizzled channels of the element. 417 * This will end up matching the size of the glsl_type of this field. 418 */ 419 int last_swiz = -1; 420 for (unsigned int j = 0; j < 4; j++) { 421 int swiz = GET_SWZ(slots[i].swizzle, j); 422 if (swiz == last_swiz) 423 break; 424 last_swiz = swiz; 425 426 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 427 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 428 this->uniform_size[this->uniforms]++; 429 } 430 this->uniforms++; 431 } 432} 433 434dst_reg * 435vec4_visitor::variable_storage(ir_variable *var) 436{ 437 return (dst_reg *)hash_table_find(this->variable_ht, var); 438} 439 440void 441vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 442{ 443 ir_expression *expr = ir->as_expression(); 444 445 if (expr) { 446 src_reg op[2]; 447 vec4_instruction *inst; 448 449 assert(expr->get_num_operands() <= 2); 450 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 451 assert(expr->operands[i]->type->is_scalar()); 452 453 expr->operands[i]->accept(this); 454 op[i] = this->result; 455 } 456 457 switch (expr->operation) { 458 case ir_unop_logic_not: 459 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 460 inst->conditional_mod = BRW_CONDITIONAL_Z; 461 break; 462 463 case ir_binop_logic_xor: 464 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 465 inst->conditional_mod = BRW_CONDITIONAL_NZ; 466 break; 467 468 case ir_binop_logic_or: 469 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 470 inst->conditional_mod = BRW_CONDITIONAL_NZ; 471 break; 472 473 case ir_binop_logic_and: 474 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 475 inst->conditional_mod = BRW_CONDITIONAL_NZ; 476 break; 477 478 case ir_unop_f2b: 479 if (intel->gen >= 6) { 480 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 481 } else { 482 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 483 } 484 inst->conditional_mod = BRW_CONDITIONAL_NZ; 485 break; 486 487 case ir_unop_i2b: 488 if (intel->gen >= 6) { 489 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 490 } else { 491 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 492 } 493 inst->conditional_mod = BRW_CONDITIONAL_NZ; 494 break; 495 496 case ir_binop_greater: 497 case ir_binop_gequal: 498 case ir_binop_less: 499 case ir_binop_lequal: 500 case ir_binop_equal: 501 case ir_binop_all_equal: 502 case ir_binop_nequal: 503 case ir_binop_any_nequal: 504 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 505 inst->conditional_mod = 506 brw_conditional_for_comparison(expr->operation); 507 break; 508 509 default: 510 assert(!"not reached"); 511 break; 512 } 513 return; 514 } 515 516 ir->accept(this); 517 518 if (intel->gen >= 6) { 519 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 520 this->result, src_reg(1)); 521 inst->conditional_mod = BRW_CONDITIONAL_NZ; 522 } else { 523 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 524 inst->conditional_mod = BRW_CONDITIONAL_NZ; 525 } 526} 527 528/** 529 * Emit a gen6 IF statement with the comparison folded into the IF 530 * instruction. 531 */ 532void 533vec4_visitor::emit_if_gen6(ir_if *ir) 534{ 535 ir_expression *expr = ir->condition->as_expression(); 536 537 if (expr) { 538 src_reg op[2]; 539 vec4_instruction *inst; 540 dst_reg temp; 541 542 assert(expr->get_num_operands() <= 2); 543 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 544 assert(expr->operands[i]->type->is_scalar() || 545 expr->operation == ir_binop_any_nequal || 546 expr->operation == ir_binop_all_equal); 547 548 expr->operands[i]->accept(this); 549 op[i] = this->result; 550 } 551 552 switch (expr->operation) { 553 case ir_unop_logic_not: 554 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 555 inst->conditional_mod = BRW_CONDITIONAL_Z; 556 return; 557 558 case ir_binop_logic_xor: 559 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 560 inst->conditional_mod = BRW_CONDITIONAL_NZ; 561 return; 562 563 case ir_binop_logic_or: 564 temp = dst_reg(this, glsl_type::bool_type); 565 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 566 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 567 inst->conditional_mod = BRW_CONDITIONAL_NZ; 568 return; 569 570 case ir_binop_logic_and: 571 temp = dst_reg(this, glsl_type::bool_type); 572 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 573 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 574 inst->conditional_mod = BRW_CONDITIONAL_NZ; 575 return; 576 577 case ir_unop_f2b: 578 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 579 inst->conditional_mod = BRW_CONDITIONAL_NZ; 580 return; 581 582 case ir_unop_i2b: 583 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 584 inst->conditional_mod = BRW_CONDITIONAL_NZ; 585 return; 586 587 case ir_binop_greater: 588 case ir_binop_gequal: 589 case ir_binop_less: 590 case ir_binop_lequal: 591 case ir_binop_equal: 592 case ir_binop_nequal: 593 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 594 inst->conditional_mod = 595 brw_conditional_for_comparison(expr->operation); 596 return; 597 598 case ir_binop_all_equal: 599 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 600 inst->conditional_mod = BRW_CONDITIONAL_Z; 601 602 inst = emit(BRW_OPCODE_IF); 603 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 604 return; 605 606 case ir_binop_any_nequal: 607 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 608 inst->conditional_mod = BRW_CONDITIONAL_NZ; 609 610 inst = emit(BRW_OPCODE_IF); 611 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 612 return; 613 614 default: 615 assert(!"not reached"); 616 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 617 inst->conditional_mod = BRW_CONDITIONAL_NZ; 618 return; 619 } 620 return; 621 } 622 623 ir->condition->accept(this); 624 625 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 626 this->result, src_reg(0)); 627 inst->conditional_mod = BRW_CONDITIONAL_NZ; 628} 629 630void 631vec4_visitor::visit(ir_variable *ir) 632{ 633 dst_reg *reg = NULL; 634 635 if (variable_storage(ir)) 636 return; 637 638 switch (ir->mode) { 639 case ir_var_in: 640 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 641 break; 642 643 case ir_var_out: 644 reg = new(mem_ctx) dst_reg(this, ir->type); 645 646 for (int i = 0; i < type_size(ir->type); i++) { 647 output_reg[ir->location + i] = *reg; 648 output_reg[ir->location + i].reg_offset = i; 649 } 650 break; 651 652 case ir_var_auto: 653 case ir_var_temporary: 654 reg = new(mem_ctx) dst_reg(this, ir->type); 655 break; 656 657 case ir_var_uniform: 658 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 659 660 if (!strncmp(ir->name, "gl_", 3)) { 661 setup_builtin_uniform_values(ir); 662 } else { 663 setup_uniform_values(ir->location, ir->type); 664 } 665 break; 666 667 default: 668 assert(!"not reached"); 669 } 670 671 reg->type = brw_type_for_base_type(ir->type); 672 hash_table_insert(this->variable_ht, reg, ir); 673} 674 675void 676vec4_visitor::visit(ir_loop *ir) 677{ 678 ir_dereference_variable *counter = NULL; 679 680 fail("not yet\n"); 681 682 /* We don't want debugging output to print the whole body of the 683 * loop as the annotation. 684 */ 685 this->base_ir = NULL; 686 687 if (ir->counter != NULL) 688 counter = new(ir) ir_dereference_variable(ir->counter); 689 690 if (ir->from != NULL) { 691 assert(ir->counter != NULL); 692 693 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 694 695 a->accept(this); 696 delete a; 697 } 698 699 emit(BRW_OPCODE_DO); 700 701 if (ir->to) { 702 ir_expression *e = 703 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 704 counter, ir->to); 705 ir_if *if_stmt = new(ir) ir_if(e); 706 707 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 708 709 if_stmt->then_instructions.push_tail(brk); 710 711 if_stmt->accept(this); 712 713 delete if_stmt; 714 delete e; 715 delete brk; 716 } 717 718 visit_instructions(&ir->body_instructions); 719 720 if (ir->increment) { 721 ir_expression *e = 722 new(ir) ir_expression(ir_binop_add, counter->type, 723 counter, ir->increment); 724 725 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 726 727 a->accept(this); 728 delete a; 729 delete e; 730 } 731 732 emit(BRW_OPCODE_WHILE); 733} 734 735void 736vec4_visitor::visit(ir_loop_jump *ir) 737{ 738 switch (ir->mode) { 739 case ir_loop_jump::jump_break: 740 emit(BRW_OPCODE_BREAK); 741 break; 742 case ir_loop_jump::jump_continue: 743 emit(BRW_OPCODE_CONTINUE); 744 break; 745 } 746} 747 748 749void 750vec4_visitor::visit(ir_function_signature *ir) 751{ 752 assert(0); 753 (void)ir; 754} 755 756void 757vec4_visitor::visit(ir_function *ir) 758{ 759 /* Ignore function bodies other than main() -- we shouldn't see calls to 760 * them since they should all be inlined. 761 */ 762 if (strcmp(ir->name, "main") == 0) { 763 const ir_function_signature *sig; 764 exec_list empty; 765 766 sig = ir->matching_signature(&empty); 767 768 assert(sig); 769 770 visit_instructions(&sig->body); 771 } 772} 773 774GLboolean 775vec4_visitor::try_emit_sat(ir_expression *ir) 776{ 777 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 778 if (!sat_src) 779 return false; 780 781 sat_src->accept(this); 782 src_reg src = this->result; 783 784 this->result = src_reg(this, ir->type); 785 vec4_instruction *inst; 786 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 787 inst->saturate = true; 788 789 return true; 790} 791 792void 793vec4_visitor::emit_bool_comparison(unsigned int op, 794 dst_reg dst, src_reg src0, src_reg src1) 795{ 796 /* original gen4 does destination conversion before comparison. */ 797 if (intel->gen < 5) 798 dst.type = src0.type; 799 800 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 801 inst->conditional_mod = brw_conditional_for_comparison(op); 802 803 dst.type = BRW_REGISTER_TYPE_D; 804 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 805} 806 807void 808vec4_visitor::visit(ir_expression *ir) 809{ 810 unsigned int operand; 811 src_reg op[Elements(ir->operands)]; 812 src_reg result_src; 813 dst_reg result_dst; 814 vec4_instruction *inst; 815 816 if (try_emit_sat(ir)) 817 return; 818 819 for (operand = 0; operand < ir->get_num_operands(); operand++) { 820 this->result.file = BAD_FILE; 821 ir->operands[operand]->accept(this); 822 if (this->result.file == BAD_FILE) { 823 printf("Failed to get tree for expression operand:\n"); 824 ir->operands[operand]->print(); 825 exit(1); 826 } 827 op[operand] = this->result; 828 829 /* Matrix expression operands should have been broken down to vector 830 * operations already. 831 */ 832 assert(!ir->operands[operand]->type->is_matrix()); 833 } 834 835 int vector_elements = ir->operands[0]->type->vector_elements; 836 if (ir->operands[1]) { 837 vector_elements = MAX2(vector_elements, 838 ir->operands[1]->type->vector_elements); 839 } 840 841 this->result.file = BAD_FILE; 842 843 /* Storage for our result. Ideally for an assignment we'd be using 844 * the actual storage for the result here, instead. 845 */ 846 result_src = src_reg(this, ir->type); 847 /* convenience for the emit functions below. */ 848 result_dst = dst_reg(result_src); 849 /* If nothing special happens, this is the result. */ 850 this->result = result_src; 851 /* Limit writes to the channels that will be used by result_src later. 852 * This does limit this temp's use as a temporary for multi-instruction 853 * sequences. 854 */ 855 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 856 857 switch (ir->operation) { 858 case ir_unop_logic_not: 859 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 860 * ones complement of the whole register, not just bit 0. 861 */ 862 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 863 break; 864 case ir_unop_neg: 865 op[0].negate = !op[0].negate; 866 this->result = op[0]; 867 break; 868 case ir_unop_abs: 869 op[0].abs = true; 870 op[0].negate = false; 871 this->result = op[0]; 872 break; 873 874 case ir_unop_sign: 875 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 876 877 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 878 inst->conditional_mod = BRW_CONDITIONAL_G; 879 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 880 inst->predicate = BRW_PREDICATE_NORMAL; 881 882 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 883 inst->conditional_mod = BRW_CONDITIONAL_L; 884 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 885 inst->predicate = BRW_PREDICATE_NORMAL; 886 887 break; 888 889 case ir_unop_rcp: 890 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 891 break; 892 893 case ir_unop_exp2: 894 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 895 break; 896 case ir_unop_log2: 897 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 898 break; 899 case ir_unop_exp: 900 case ir_unop_log: 901 assert(!"not reached: should be handled by ir_explog_to_explog2"); 902 break; 903 case ir_unop_sin: 904 case ir_unop_sin_reduced: 905 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 906 break; 907 case ir_unop_cos: 908 case ir_unop_cos_reduced: 909 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 910 break; 911 912 case ir_unop_dFdx: 913 case ir_unop_dFdy: 914 assert(!"derivatives not valid in vertex shader"); 915 break; 916 917 case ir_unop_noise: 918 assert(!"not reached: should be handled by lower_noise"); 919 break; 920 921 case ir_binop_add: 922 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 923 break; 924 case ir_binop_sub: 925 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 926 break; 927 928 case ir_binop_mul: 929 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 930 break; 931 case ir_binop_div: 932 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 933 case ir_binop_mod: 934 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 935 break; 936 937 case ir_binop_less: 938 case ir_binop_greater: 939 case ir_binop_lequal: 940 case ir_binop_gequal: 941 case ir_binop_equal: 942 case ir_binop_nequal: { 943 dst_reg temp = result_dst; 944 /* original gen4 does implicit conversion before comparison. */ 945 if (intel->gen < 5) 946 temp.type = op[0].type; 947 948 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 949 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 950 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 951 break; 952 } 953 954 case ir_binop_all_equal: 955 /* "==" operator producing a scalar boolean. */ 956 if (ir->operands[0]->type->is_vector() || 957 ir->operands[1]->type->is_vector()) { 958 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 959 inst->conditional_mod = BRW_CONDITIONAL_Z; 960 961 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 962 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 963 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 964 } else { 965 dst_reg temp = result_dst; 966 /* original gen4 does implicit conversion before comparison. */ 967 if (intel->gen < 5) 968 temp.type = op[0].type; 969 970 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 971 inst->conditional_mod = BRW_CONDITIONAL_NZ; 972 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 973 } 974 break; 975 case ir_binop_any_nequal: 976 /* "!=" operator producing a scalar boolean. */ 977 if (ir->operands[0]->type->is_vector() || 978 ir->operands[1]->type->is_vector()) { 979 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 980 inst->conditional_mod = BRW_CONDITIONAL_NZ; 981 982 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 983 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 984 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 985 } else { 986 dst_reg temp = result_dst; 987 /* original gen4 does implicit conversion before comparison. */ 988 if (intel->gen < 5) 989 temp.type = op[0].type; 990 991 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 992 inst->conditional_mod = BRW_CONDITIONAL_NZ; 993 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 994 } 995 break; 996 997 case ir_unop_any: 998 emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 999 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1000 1001 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1002 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1003 break; 1004 1005 case ir_binop_logic_xor: 1006 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1007 break; 1008 1009 case ir_binop_logic_or: 1010 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1011 break; 1012 1013 case ir_binop_logic_and: 1014 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1015 break; 1016 1017 case ir_binop_dot: 1018 assert(ir->operands[0]->type->is_vector()); 1019 assert(ir->operands[0]->type == ir->operands[1]->type); 1020 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1021 break; 1022 1023 case ir_unop_sqrt: 1024 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1025 break; 1026 case ir_unop_rsq: 1027 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1028 break; 1029 case ir_unop_i2f: 1030 case ir_unop_i2u: 1031 case ir_unop_u2i: 1032 case ir_unop_u2f: 1033 case ir_unop_b2f: 1034 case ir_unop_b2i: 1035 case ir_unop_f2i: 1036 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1037 break; 1038 case ir_unop_f2b: 1039 case ir_unop_i2b: { 1040 dst_reg temp = result_dst; 1041 /* original gen4 does implicit conversion before comparison. */ 1042 if (intel->gen < 5) 1043 temp.type = op[0].type; 1044 1045 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1046 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1047 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1048 break; 1049 } 1050 1051 case ir_unop_trunc: 1052 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1053 break; 1054 case ir_unop_ceil: 1055 op[0].negate = !op[0].negate; 1056 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1057 this->result.negate = true; 1058 break; 1059 case ir_unop_floor: 1060 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1061 break; 1062 case ir_unop_fract: 1063 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1064 break; 1065 case ir_unop_round_even: 1066 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1067 break; 1068 1069 case ir_binop_min: 1070 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1071 inst->conditional_mod = BRW_CONDITIONAL_L; 1072 1073 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1074 inst->predicate = BRW_PREDICATE_NORMAL; 1075 break; 1076 case ir_binop_max: 1077 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1078 inst->conditional_mod = BRW_CONDITIONAL_G; 1079 1080 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1081 inst->predicate = BRW_PREDICATE_NORMAL; 1082 break; 1083 1084 case ir_binop_pow: 1085 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1086 break; 1087 1088 case ir_unop_bit_not: 1089 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1090 break; 1091 case ir_binop_bit_and: 1092 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1093 break; 1094 case ir_binop_bit_xor: 1095 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1096 break; 1097 case ir_binop_bit_or: 1098 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1099 break; 1100 1101 case ir_binop_lshift: 1102 case ir_binop_rshift: 1103 assert(!"GLSL 1.30 features unsupported"); 1104 break; 1105 1106 case ir_quadop_vector: 1107 assert(!"not reached: should be handled by lower_quadop_vector"); 1108 break; 1109 } 1110} 1111 1112 1113void 1114vec4_visitor::visit(ir_swizzle *ir) 1115{ 1116 src_reg src; 1117 int i = 0; 1118 int swizzle[4]; 1119 1120 /* Note that this is only swizzles in expressions, not those on the left 1121 * hand side of an assignment, which do write masking. See ir_assignment 1122 * for that. 1123 */ 1124 1125 ir->val->accept(this); 1126 src = this->result; 1127 assert(src.file != BAD_FILE); 1128 1129 for (i = 0; i < ir->type->vector_elements; i++) { 1130 switch (i) { 1131 case 0: 1132 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1133 break; 1134 case 1: 1135 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1136 break; 1137 case 2: 1138 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1139 break; 1140 case 3: 1141 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1142 break; 1143 } 1144 } 1145 for (; i < 4; i++) { 1146 /* Replicate the last channel out. */ 1147 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1148 } 1149 1150 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1151 1152 this->result = src; 1153} 1154 1155void 1156vec4_visitor::visit(ir_dereference_variable *ir) 1157{ 1158 const struct glsl_type *type = ir->type; 1159 dst_reg *reg = variable_storage(ir->var); 1160 1161 if (!reg) { 1162 fail("Failed to find variable storage for %s\n", ir->var->name); 1163 this->result = src_reg(brw_null_reg()); 1164 return; 1165 } 1166 1167 this->result = src_reg(*reg); 1168 1169 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1170 this->result.swizzle = swizzle_for_size(type->vector_elements); 1171} 1172 1173void 1174vec4_visitor::visit(ir_dereference_array *ir) 1175{ 1176 ir_constant *constant_index; 1177 src_reg src; 1178 int element_size = type_size(ir->type); 1179 1180 constant_index = ir->array_index->constant_expression_value(); 1181 1182 ir->array->accept(this); 1183 src = this->result; 1184 1185 if (constant_index) { 1186 src.reg_offset += constant_index->value.i[0] * element_size; 1187 } else { 1188#if 0 /* Variable array index */ 1189 /* Variable index array dereference. It eats the "vec4" of the 1190 * base of the array and an index that offsets the Mesa register 1191 * index. 1192 */ 1193 ir->array_index->accept(this); 1194 1195 src_reg index_reg; 1196 1197 if (element_size == 1) { 1198 index_reg = this->result; 1199 } else { 1200 index_reg = src_reg(this, glsl_type::float_type); 1201 1202 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1203 this->result, src_reg_for_float(element_size)); 1204 } 1205 1206 src.reladdr = ralloc(mem_ctx, src_reg); 1207 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1208#endif 1209 } 1210 1211 /* If the type is smaller than a vec4, replicate the last channel out. */ 1212 if (ir->type->is_scalar() || ir->type->is_vector()) 1213 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1214 else 1215 src.swizzle = BRW_SWIZZLE_NOOP; 1216 src.type = brw_type_for_base_type(ir->type); 1217 1218 this->result = src; 1219} 1220 1221void 1222vec4_visitor::visit(ir_dereference_record *ir) 1223{ 1224 unsigned int i; 1225 const glsl_type *struct_type = ir->record->type; 1226 int offset = 0; 1227 1228 ir->record->accept(this); 1229 1230 for (i = 0; i < struct_type->length; i++) { 1231 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1232 break; 1233 offset += type_size(struct_type->fields.structure[i].type); 1234 } 1235 1236 /* If the type is smaller than a vec4, replicate the last channel out. */ 1237 if (ir->type->is_scalar() || ir->type->is_vector()) 1238 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1239 else 1240 this->result.swizzle = BRW_SWIZZLE_NOOP; 1241 this->result.type = brw_type_for_base_type(ir->type); 1242 1243 this->result.reg_offset += offset; 1244} 1245 1246/** 1247 * We want to be careful in assignment setup to hit the actual storage 1248 * instead of potentially using a temporary like we might with the 1249 * ir_dereference handler. 1250 */ 1251static dst_reg 1252get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1253{ 1254 /* The LHS must be a dereference. If the LHS is a variable indexed array 1255 * access of a vector, it must be separated into a series conditional moves 1256 * before reaching this point (see ir_vec_index_to_cond_assign). 1257 */ 1258 assert(ir->as_dereference()); 1259 ir_dereference_array *deref_array = ir->as_dereference_array(); 1260 if (deref_array) { 1261 assert(!deref_array->array->type->is_vector()); 1262 } 1263 1264 /* Use the rvalue deref handler for the most part. We'll ignore 1265 * swizzles in it and write swizzles using writemask, though. 1266 */ 1267 ir->accept(v); 1268 return dst_reg(v->result); 1269} 1270 1271void 1272vec4_visitor::emit_block_move(ir_assignment *ir) 1273{ 1274 ir->rhs->accept(this); 1275 src_reg src = this->result; 1276 1277 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1278 1279 if (ir->condition) { 1280 emit_bool_to_cond_code(ir->condition); 1281 } 1282 1283 /* FINISHME: This should really set to the correct maximal writemask for each 1284 * FINISHME: component written (in the loops below). 1285 */ 1286 dst.writemask = WRITEMASK_XYZW; 1287 1288 for (int i = 0; i < type_size(ir->lhs->type); i++) { 1289 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1290 if (ir->condition) 1291 inst->predicate = BRW_PREDICATE_NORMAL; 1292 1293 dst.reg_offset++; 1294 src.reg_offset++; 1295 } 1296} 1297 1298void 1299vec4_visitor::visit(ir_assignment *ir) 1300{ 1301 if (!ir->lhs->type->is_scalar() && 1302 !ir->lhs->type->is_vector()) { 1303 emit_block_move(ir); 1304 return; 1305 } 1306 1307 /* Now we're down to just a scalar/vector with writemasks. */ 1308 int i; 1309 1310 ir->rhs->accept(this); 1311 src_reg src = this->result; 1312 1313 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1314 1315 int swizzles[4]; 1316 int first_enabled_chan = 0; 1317 int src_chan = 0; 1318 1319 assert(ir->lhs->type->is_vector() || 1320 ir->lhs->type->is_scalar()); 1321 dst.writemask = ir->write_mask; 1322 1323 for (int i = 0; i < 4; i++) { 1324 if (dst.writemask & (1 << i)) { 1325 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1326 break; 1327 } 1328 } 1329 1330 /* Swizzle a small RHS vector into the channels being written. 1331 * 1332 * glsl ir treats write_mask as dictating how many channels are 1333 * present on the RHS while in our instructions we need to make 1334 * those channels appear in the slots of the vec4 they're written to. 1335 */ 1336 for (int i = 0; i < 4; i++) { 1337 if (dst.writemask & (1 << i)) 1338 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1339 else 1340 swizzles[i] = first_enabled_chan; 1341 } 1342 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1343 swizzles[2], swizzles[3]); 1344 1345 if (ir->condition) { 1346 emit_bool_to_cond_code(ir->condition); 1347 } 1348 1349 for (i = 0; i < type_size(ir->lhs->type); i++) { 1350 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1351 1352 if (ir->condition) 1353 inst->predicate = BRW_PREDICATE_NORMAL; 1354 1355 dst.reg_offset++; 1356 src.reg_offset++; 1357 } 1358} 1359 1360 1361void 1362vec4_visitor::visit(ir_constant *ir) 1363{ 1364 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1365 src_reg temp_base = src_reg(this, ir->type); 1366 dst_reg temp = dst_reg(temp_base); 1367 1368 foreach_iter(exec_list_iterator, iter, ir->components) { 1369 ir_constant *field_value = (ir_constant *)iter.get(); 1370 int size = type_size(field_value->type); 1371 1372 assert(size > 0); 1373 1374 field_value->accept(this); 1375 src_reg src = this->result; 1376 1377 for (int i = 0; i < (unsigned int)size; i++) { 1378 emit(BRW_OPCODE_MOV, temp, src); 1379 1380 src.reg_offset++; 1381 temp.reg_offset++; 1382 } 1383 } 1384 this->result = temp_base; 1385 return; 1386 } 1387 1388 if (ir->type->is_array()) { 1389 src_reg temp_base = src_reg(this, ir->type); 1390 dst_reg temp = dst_reg(temp_base); 1391 int size = type_size(ir->type->fields.array); 1392 1393 assert(size > 0); 1394 1395 for (unsigned int i = 0; i < ir->type->length; i++) { 1396 ir->array_elements[i]->accept(this); 1397 src_reg src = this->result; 1398 for (int j = 0; j < size; j++) { 1399 emit(BRW_OPCODE_MOV, temp, src); 1400 1401 src.reg_offset++; 1402 temp.reg_offset++; 1403 } 1404 } 1405 this->result = temp_base; 1406 return; 1407 } 1408 1409 if (ir->type->is_matrix()) { 1410 this->result = src_reg(this, ir->type); 1411 dst_reg dst = dst_reg(this->result); 1412 1413 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1414 1415 for (int i = 0; i < ir->type->matrix_columns; i++) { 1416 for (int j = 0; j < ir->type->vector_elements; j++) { 1417 dst.writemask = 1 << j; 1418 emit(BRW_OPCODE_MOV, dst, 1419 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1420 } 1421 dst.reg_offset++; 1422 } 1423 return; 1424 } 1425 1426 this->result = src_reg(this, ir->type); 1427 dst_reg dst = dst_reg(this->result); 1428 1429 for (int i = 0; i < ir->type->vector_elements; i++) { 1430 dst.writemask = 1 << i; 1431 1432 switch (ir->type->base_type) { 1433 case GLSL_TYPE_FLOAT: 1434 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); 1435 break; 1436 case GLSL_TYPE_INT: 1437 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); 1438 break; 1439 case GLSL_TYPE_UINT: 1440 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); 1441 break; 1442 case GLSL_TYPE_BOOL: 1443 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); 1444 break; 1445 default: 1446 assert(!"Non-float/uint/int/bool constant"); 1447 break; 1448 } 1449 } 1450} 1451 1452void 1453vec4_visitor::visit(ir_call *ir) 1454{ 1455 assert(!"not reached"); 1456} 1457 1458void 1459vec4_visitor::visit(ir_texture *ir) 1460{ 1461 assert(!"not reached"); 1462} 1463 1464void 1465vec4_visitor::visit(ir_return *ir) 1466{ 1467 assert(!"not reached"); 1468} 1469 1470void 1471vec4_visitor::visit(ir_discard *ir) 1472{ 1473 assert(!"not reached"); 1474} 1475 1476void 1477vec4_visitor::visit(ir_if *ir) 1478{ 1479 /* Don't point the annotation at the if statement, because then it plus 1480 * the then and else blocks get printed. 1481 */ 1482 this->base_ir = ir->condition; 1483 1484 if (intel->gen == 6) { 1485 emit_if_gen6(ir); 1486 } else { 1487 emit_bool_to_cond_code(ir->condition); 1488 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1489 inst->predicate = BRW_PREDICATE_NORMAL; 1490 } 1491 1492 visit_instructions(&ir->then_instructions); 1493 1494 if (!ir->else_instructions.is_empty()) { 1495 this->base_ir = ir->condition; 1496 emit(BRW_OPCODE_ELSE); 1497 1498 visit_instructions(&ir->else_instructions); 1499 } 1500 1501 this->base_ir = ir->condition; 1502 emit(BRW_OPCODE_ENDIF); 1503} 1504 1505int 1506vec4_visitor::emit_vue_header_gen4(int header_mrf) 1507{ 1508 /* Get the position */ 1509 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1510 1511 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1512 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1513 1514 current_annotation = "NDC"; 1515 dst_reg ndc_w = ndc; 1516 ndc_w.writemask = WRITEMASK_W; 1517 src_reg pos_w = pos; 1518 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1519 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1520 1521 dst_reg ndc_xyz = ndc; 1522 ndc_xyz.writemask = WRITEMASK_XYZ; 1523 1524 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1525 1526 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1527 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1528 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1529 GLuint i; 1530 1531 emit(BRW_OPCODE_MOV, header1, 0u); 1532 1533 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1534 assert(!"finishme: psiz"); 1535 src_reg psiz; 1536 1537 header1.writemask = WRITEMASK_W; 1538 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1539 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1540 } 1541 1542 for (i = 0; i < c->key.nr_userclip; i++) { 1543 vec4_instruction *inst; 1544 1545 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1546 pos, src_reg(c->userplane[i])); 1547 inst->conditional_mod = BRW_CONDITIONAL_L; 1548 1549 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1550 inst->predicate = BRW_PREDICATE_NORMAL; 1551 } 1552 1553 /* i965 clipping workaround: 1554 * 1) Test for -ve rhw 1555 * 2) If set, 1556 * set ndc = (0,0,0,0) 1557 * set ucp[6] = 1 1558 * 1559 * Later, clipping will detect ucp[6] and ensure the primitive is 1560 * clipped against all fixed planes. 1561 */ 1562 if (brw->has_negative_rhw_bug) { 1563#if 0 1564 /* FINISHME */ 1565 brw_CMP(p, 1566 vec8(brw_null_reg()), 1567 BRW_CONDITIONAL_L, 1568 brw_swizzle1(ndc, 3), 1569 brw_imm_f(0)); 1570 1571 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1572 brw_MOV(p, ndc, brw_imm_f(0)); 1573 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1574#endif 1575 } 1576 1577 header1.writemask = WRITEMASK_XYZW; 1578 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1579 } else { 1580 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1581 BRW_REGISTER_TYPE_UD), 0u); 1582 } 1583 1584 if (intel->gen == 5) { 1585 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1586 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1587 * dword 4-7 (m2) is the ndc position (set above) 1588 * dword 8-11 (m3) of the vertex header is the 4D space position 1589 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1590 * m6 is a pad so that the vertex element data is aligned 1591 * m7 is the first vertex data we fill. 1592 */ 1593 current_annotation = "NDC"; 1594 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1595 1596 current_annotation = "gl_Position"; 1597 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1598 1599 /* user clip distance. */ 1600 header_mrf += 2; 1601 1602 /* Pad so that vertex element data is aligned. */ 1603 header_mrf++; 1604 } else { 1605 /* There are 8 dwords in VUE header pre-Ironlake: 1606 * dword 0-3 (m1) is indices, point width, clip flags. 1607 * dword 4-7 (m2) is ndc position (set above) 1608 * 1609 * dword 8-11 (m3) is the first vertex data. 1610 */ 1611 current_annotation = "NDC"; 1612 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1613 1614 current_annotation = "gl_Position"; 1615 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1616 } 1617 1618 return header_mrf; 1619} 1620 1621int 1622vec4_visitor::emit_vue_header_gen6(int header_mrf) 1623{ 1624 struct brw_reg reg; 1625 1626 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1627 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1628 * dword 4-7 (m3) is the 4D space position 1629 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1630 * enabled. 1631 * 1632 * m4 or 6 is the first vertex element data we fill. 1633 */ 1634 1635 current_annotation = "indices, point width, clip flags"; 1636 reg = brw_message_reg(header_mrf++); 1637 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1638 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1639 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1640 src_reg(output_reg[VERT_RESULT_PSIZ])); 1641 } 1642 1643 current_annotation = "gl_Position"; 1644 emit(BRW_OPCODE_MOV, 1645 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1646 1647 current_annotation = "user clip distances"; 1648 if (c->key.nr_userclip) { 1649 for (int i = 0; i < c->key.nr_userclip; i++) { 1650 struct brw_reg m; 1651 if (i < 4) 1652 m = brw_message_reg(header_mrf); 1653 else 1654 m = brw_message_reg(header_mrf + 1); 1655 1656 emit(BRW_OPCODE_DP4, 1657 dst_reg(brw_writemask(m, 1 << (i & 3))), 1658 src_reg(c->userplane[i])); 1659 } 1660 header_mrf += 2; 1661 } 1662 1663 current_annotation = NULL; 1664 1665 return header_mrf; 1666} 1667 1668static int 1669align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1670{ 1671 struct intel_context *intel = &brw->intel; 1672 1673 if (intel->gen >= 6) { 1674 /* URB data written (does not include the message header reg) must 1675 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1676 * section 5.4.3.2.2: URB_INTERLEAVED. 1677 * 1678 * URB entries are allocated on a multiple of 1024 bits, so an 1679 * extra 128 bits written here to make the end align to 256 is 1680 * no problem. 1681 */ 1682 if ((mlen % 2) != 1) 1683 mlen++; 1684 } 1685 1686 return mlen; 1687} 1688 1689/** 1690 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1691 * complete the VS thread. 1692 * 1693 * The VUE layout is documented in Volume 2a. 1694 */ 1695void 1696vec4_visitor::emit_urb_writes() 1697{ 1698 int base_mrf = 1; 1699 int mrf = base_mrf; 1700 int urb_entry_size; 1701 1702 /* FINISHME: edgeflag */ 1703 1704 /* First mrf is the g0-based message header containing URB handles and such, 1705 * which is implied in VS_OPCODE_URB_WRITE. 1706 */ 1707 mrf++; 1708 1709 if (intel->gen >= 6) { 1710 mrf = emit_vue_header_gen6(mrf); 1711 } else { 1712 mrf = emit_vue_header_gen4(mrf); 1713 } 1714 1715 int attr; 1716 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1717 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1718 continue; 1719 1720 /* This is set up in the VUE header. */ 1721 if (attr == VERT_RESULT_HPOS) 1722 continue; 1723 1724 /* This is loaded into the VUE header, and thus doesn't occupy 1725 * an attribute slot. 1726 */ 1727 if (attr == VERT_RESULT_PSIZ) 1728 continue; 1729 1730 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1731 1732 /* If this is MRF 15, we can't fit anything more into this URB 1733 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1734 * even-numbered amount of URB write data, which will meet 1735 * gen6's requirements for length alignment. 1736 */ 1737 if (mrf == 15) 1738 break; 1739 } 1740 1741 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1742 inst->base_mrf = base_mrf; 1743 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1744 inst->eot = true; 1745 1746 urb_entry_size = mrf - base_mrf; 1747 1748 for (; attr < VERT_RESULT_MAX; attr++) { 1749 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1750 continue; 1751 fail("Second URB write not supported.\n"); 1752 break; 1753 } 1754 1755 if (intel->gen == 6) 1756 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1757 else 1758 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1759} 1760 1761vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1762 struct gl_shader_program *prog, 1763 struct brw_shader *shader) 1764{ 1765 this->c = c; 1766 this->p = &c->func; 1767 this->brw = p->brw; 1768 this->intel = &brw->intel; 1769 this->ctx = &intel->ctx; 1770 this->prog = prog; 1771 this->shader = shader; 1772 1773 this->mem_ctx = ralloc_context(NULL); 1774 this->failed = false; 1775 1776 this->base_ir = NULL; 1777 this->current_annotation = NULL; 1778 1779 this->c = c; 1780 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 1781 this->prog_data = &c->prog_data; 1782 1783 this->variable_ht = hash_table_ctor(0, 1784 hash_table_pointer_hash, 1785 hash_table_pointer_compare); 1786 1787 this->virtual_grf_sizes = NULL; 1788 this->virtual_grf_count = 0; 1789 this->virtual_grf_array_size = 0; 1790 1791 this->uniforms = 0; 1792 1793 this->variable_ht = hash_table_ctor(0, 1794 hash_table_pointer_hash, 1795 hash_table_pointer_compare); 1796} 1797 1798vec4_visitor::~vec4_visitor() 1799{ 1800 hash_table_dtor(this->variable_ht); 1801} 1802 1803 1804void 1805vec4_visitor::fail(const char *format, ...) 1806{ 1807 va_list va; 1808 char *msg; 1809 1810 if (failed) 1811 return; 1812 1813 failed = true; 1814 1815 va_start(va, format); 1816 msg = ralloc_vasprintf(mem_ctx, format, va); 1817 va_end(va); 1818 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 1819 1820 this->fail_msg = msg; 1821 1822 if (INTEL_DEBUG & DEBUG_VS) { 1823 fprintf(stderr, "%s", msg); 1824 } 1825} 1826 1827} /* namespace brw */ 1828