brw_vec4_visitor.cpp revision 7bf70c29adf175f51d0347d0187aecc0e9bbbcb8
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 this->reladdr = reg.reladdr; 41 this->fixed_hw_reg = reg.fixed_hw_reg; 42 43 int swizzles[4]; 44 int next_chan = 0; 45 int last = 0; 46 47 for (int i = 0; i < 4; i++) { 48 if (!(reg.writemask & (1 << i))) 49 continue; 50 51 swizzles[next_chan++] = last = i; 52 } 53 54 for (; next_chan < 4; next_chan++) { 55 swizzles[next_chan] = last; 56 } 57 58 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 59 swizzles[2], swizzles[3]); 60} 61 62dst_reg::dst_reg(src_reg reg) 63{ 64 init(); 65 66 this->file = reg.file; 67 this->reg = reg.reg; 68 this->reg_offset = reg.reg_offset; 69 this->type = reg.type; 70 this->writemask = WRITEMASK_XYZW; 71 this->reladdr = reg.reladdr; 72 this->fixed_hw_reg = reg.fixed_hw_reg; 73} 74 75vec4_instruction * 76vec4_visitor::emit(enum opcode opcode, dst_reg dst, 77 src_reg src0, src_reg src1, src_reg src2) 78{ 79 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 80 81 inst->opcode = opcode; 82 inst->dst = dst; 83 inst->src[0] = src0; 84 inst->src[1] = src1; 85 inst->src[2] = src2; 86 inst->ir = this->base_ir; 87 inst->annotation = this->current_annotation; 88 89 this->instructions.push_tail(inst); 90 91 return inst; 92} 93 94 95vec4_instruction * 96vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 97{ 98 return emit(opcode, dst, src0, src1, src_reg()); 99} 100 101vec4_instruction * 102vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 103{ 104 assert(dst.writemask != 0); 105 return emit(opcode, dst, src0, src_reg(), src_reg()); 106} 107 108vec4_instruction * 109vec4_visitor::emit(enum opcode opcode) 110{ 111 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 112} 113 114void 115vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 116{ 117 static enum opcode dot_opcodes[] = { 118 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 119 }; 120 121 emit(dot_opcodes[elements - 2], dst, src0, src1); 122} 123 124void 125vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 126{ 127 /* The gen6 math instruction ignores the source modifiers -- 128 * swizzle, abs, negate, and at least some parts of the register 129 * region description. 130 */ 131 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 132 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 133 134 if (dst.writemask != WRITEMASK_XYZW) { 135 /* The gen6 math instruction must be align1, so we can't do 136 * writemasks. 137 */ 138 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); 139 140 emit(opcode, temp_dst, temp_src); 141 142 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); 143 } else { 144 emit(opcode, dst, temp_src); 145 } 146} 147 148void 149vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 150{ 151 vec4_instruction *inst = emit(opcode, dst, src); 152 inst->base_mrf = 1; 153 inst->mlen = 1; 154} 155 156void 157vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 158{ 159 switch (opcode) { 160 case SHADER_OPCODE_RCP: 161 case SHADER_OPCODE_RSQ: 162 case SHADER_OPCODE_SQRT: 163 case SHADER_OPCODE_EXP2: 164 case SHADER_OPCODE_LOG2: 165 case SHADER_OPCODE_SIN: 166 case SHADER_OPCODE_COS: 167 break; 168 default: 169 assert(!"not reached: bad math opcode"); 170 return; 171 } 172 173 if (intel->gen >= 6) { 174 return emit_math1_gen6(opcode, dst, src); 175 } else { 176 return emit_math1_gen4(opcode, dst, src); 177 } 178} 179 180void 181vec4_visitor::emit_math2_gen6(enum opcode opcode, 182 dst_reg dst, src_reg src0, src_reg src1) 183{ 184 src_reg expanded; 185 186 /* The gen6 math instruction ignores the source modifiers -- 187 * swizzle, abs, negate, and at least some parts of the register 188 * region description. Move the sources to temporaries to make it 189 * generally work. 190 */ 191 192 expanded = src_reg(this, glsl_type::vec4_type); 193 emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); 194 src0 = expanded; 195 196 expanded = src_reg(this, glsl_type::vec4_type); 197 emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); 198 src1 = expanded; 199 200 if (dst.writemask != WRITEMASK_XYZW) { 201 /* The gen6 math instruction must be align1, so we can't do 202 * writemasks. 203 */ 204 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); 205 206 emit(opcode, temp_dst, src0, src1); 207 208 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); 209 } else { 210 emit(opcode, dst, src0, src1); 211 } 212} 213 214void 215vec4_visitor::emit_math2_gen4(enum opcode opcode, 216 dst_reg dst, src_reg src0, src_reg src1) 217{ 218 vec4_instruction *inst = emit(opcode, dst, src0, src1); 219 inst->base_mrf = 1; 220 inst->mlen = 2; 221} 222 223void 224vec4_visitor::emit_math(enum opcode opcode, 225 dst_reg dst, src_reg src0, src_reg src1) 226{ 227 assert(opcode == SHADER_OPCODE_POW); 228 229 if (intel->gen >= 6) { 230 return emit_math2_gen6(opcode, dst, src0, src1); 231 } else { 232 return emit_math2_gen4(opcode, dst, src0, src1); 233 } 234} 235 236void 237vec4_visitor::visit_instructions(const exec_list *list) 238{ 239 foreach_list(node, list) { 240 ir_instruction *ir = (ir_instruction *)node; 241 242 base_ir = ir; 243 ir->accept(this); 244 } 245} 246 247 248static int 249type_size(const struct glsl_type *type) 250{ 251 unsigned int i; 252 int size; 253 254 switch (type->base_type) { 255 case GLSL_TYPE_UINT: 256 case GLSL_TYPE_INT: 257 case GLSL_TYPE_FLOAT: 258 case GLSL_TYPE_BOOL: 259 if (type->is_matrix()) { 260 return type->matrix_columns; 261 } else { 262 /* Regardless of size of vector, it gets a vec4. This is bad 263 * packing for things like floats, but otherwise arrays become a 264 * mess. Hopefully a later pass over the code can pack scalars 265 * down if appropriate. 266 */ 267 return 1; 268 } 269 case GLSL_TYPE_ARRAY: 270 assert(type->length > 0); 271 return type_size(type->fields.array) * type->length; 272 case GLSL_TYPE_STRUCT: 273 size = 0; 274 for (i = 0; i < type->length; i++) { 275 size += type_size(type->fields.structure[i].type); 276 } 277 return size; 278 case GLSL_TYPE_SAMPLER: 279 /* Samplers take up one slot in UNIFORMS[], but they're baked in 280 * at link time. 281 */ 282 return 1; 283 default: 284 assert(0); 285 return 0; 286 } 287} 288 289int 290vec4_visitor::virtual_grf_alloc(int size) 291{ 292 if (virtual_grf_array_size <= virtual_grf_count) { 293 if (virtual_grf_array_size == 0) 294 virtual_grf_array_size = 16; 295 else 296 virtual_grf_array_size *= 2; 297 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 298 virtual_grf_array_size); 299 } 300 virtual_grf_sizes[virtual_grf_count] = size; 301 return virtual_grf_count++; 302} 303 304src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 305{ 306 init(); 307 308 this->file = GRF; 309 this->reg = v->virtual_grf_alloc(type_size(type)); 310 311 if (type->is_array() || type->is_record()) { 312 this->swizzle = BRW_SWIZZLE_NOOP; 313 } else { 314 this->swizzle = swizzle_for_size(type->vector_elements); 315 } 316 317 this->type = brw_type_for_base_type(type); 318} 319 320dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 321{ 322 init(); 323 324 this->file = GRF; 325 this->reg = v->virtual_grf_alloc(type_size(type)); 326 327 if (type->is_array() || type->is_record()) { 328 this->writemask = WRITEMASK_XYZW; 329 } else { 330 this->writemask = (1 << type->vector_elements) - 1; 331 } 332 333 this->type = brw_type_for_base_type(type); 334} 335 336/* Our support for uniforms is piggy-backed on the struct 337 * gl_fragment_program, because that's where the values actually 338 * get stored, rather than in some global gl_shader_program uniform 339 * store. 340 */ 341int 342vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 343{ 344 unsigned int offset = 0; 345 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 346 347 if (type->is_matrix()) { 348 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 349 type->vector_elements, 350 1); 351 352 for (unsigned int i = 0; i < type->matrix_columns; i++) { 353 offset += setup_uniform_values(loc + offset, column); 354 } 355 356 return offset; 357 } 358 359 switch (type->base_type) { 360 case GLSL_TYPE_FLOAT: 361 case GLSL_TYPE_UINT: 362 case GLSL_TYPE_INT: 363 case GLSL_TYPE_BOOL: 364 for (unsigned int i = 0; i < type->vector_elements; i++) { 365 int slot = this->uniforms * 4 + i; 366 switch (type->base_type) { 367 case GLSL_TYPE_FLOAT: 368 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 369 break; 370 case GLSL_TYPE_UINT: 371 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 372 break; 373 case GLSL_TYPE_INT: 374 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 375 break; 376 case GLSL_TYPE_BOOL: 377 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 378 break; 379 default: 380 assert(!"not reached"); 381 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 382 break; 383 } 384 c->prog_data.param[slot] = &values[i]; 385 } 386 387 for (unsigned int i = type->vector_elements; i < 4; i++) { 388 c->prog_data.param_convert[this->uniforms * 4 + i] = 389 PARAM_CONVERT_ZERO; 390 c->prog_data.param[this->uniforms * 4 + i] = NULL; 391 } 392 393 this->uniform_size[this->uniforms] = type->vector_elements; 394 this->uniforms++; 395 396 return 1; 397 398 case GLSL_TYPE_STRUCT: 399 for (unsigned int i = 0; i < type->length; i++) { 400 offset += setup_uniform_values(loc + offset, 401 type->fields.structure[i].type); 402 } 403 return offset; 404 405 case GLSL_TYPE_ARRAY: 406 for (unsigned int i = 0; i < type->length; i++) { 407 offset += setup_uniform_values(loc + offset, type->fields.array); 408 } 409 return offset; 410 411 case GLSL_TYPE_SAMPLER: 412 /* The sampler takes up a slot, but we don't use any values from it. */ 413 return 1; 414 415 default: 416 assert(!"not reached"); 417 return 0; 418 } 419} 420 421/* Our support for builtin uniforms is even scarier than non-builtin. 422 * It sits on top of the PROG_STATE_VAR parameters that are 423 * automatically updated from GL context state. 424 */ 425void 426vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 427{ 428 const ir_state_slot *const slots = ir->state_slots; 429 assert(ir->state_slots != NULL); 430 431 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 432 /* This state reference has already been setup by ir_to_mesa, 433 * but we'll get the same index back here. We can reference 434 * ParameterValues directly, since unlike brw_fs.cpp, we never 435 * add new state references during compile. 436 */ 437 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 438 (gl_state_index *)slots[i].tokens); 439 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 440 441 this->uniform_size[this->uniforms] = 0; 442 /* Add each of the unique swizzled channels of the element. 443 * This will end up matching the size of the glsl_type of this field. 444 */ 445 int last_swiz = -1; 446 for (unsigned int j = 0; j < 4; j++) { 447 int swiz = GET_SWZ(slots[i].swizzle, j); 448 last_swiz = swiz; 449 450 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 451 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 452 if (swiz <= last_swiz) 453 this->uniform_size[this->uniforms]++; 454 } 455 this->uniforms++; 456 } 457} 458 459dst_reg * 460vec4_visitor::variable_storage(ir_variable *var) 461{ 462 return (dst_reg *)hash_table_find(this->variable_ht, var); 463} 464 465void 466vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 467{ 468 ir_expression *expr = ir->as_expression(); 469 470 if (expr) { 471 src_reg op[2]; 472 vec4_instruction *inst; 473 474 assert(expr->get_num_operands() <= 2); 475 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 476 assert(expr->operands[i]->type->is_scalar()); 477 478 expr->operands[i]->accept(this); 479 op[i] = this->result; 480 } 481 482 switch (expr->operation) { 483 case ir_unop_logic_not: 484 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 485 inst->conditional_mod = BRW_CONDITIONAL_Z; 486 break; 487 488 case ir_binop_logic_xor: 489 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 490 inst->conditional_mod = BRW_CONDITIONAL_NZ; 491 break; 492 493 case ir_binop_logic_or: 494 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 495 inst->conditional_mod = BRW_CONDITIONAL_NZ; 496 break; 497 498 case ir_binop_logic_and: 499 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 500 inst->conditional_mod = BRW_CONDITIONAL_NZ; 501 break; 502 503 case ir_unop_f2b: 504 if (intel->gen >= 6) { 505 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 506 } else { 507 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 508 } 509 inst->conditional_mod = BRW_CONDITIONAL_NZ; 510 break; 511 512 case ir_unop_i2b: 513 if (intel->gen >= 6) { 514 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 515 } else { 516 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 517 } 518 inst->conditional_mod = BRW_CONDITIONAL_NZ; 519 break; 520 521 case ir_binop_greater: 522 case ir_binop_gequal: 523 case ir_binop_less: 524 case ir_binop_lequal: 525 case ir_binop_equal: 526 case ir_binop_all_equal: 527 case ir_binop_nequal: 528 case ir_binop_any_nequal: 529 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 530 inst->conditional_mod = 531 brw_conditional_for_comparison(expr->operation); 532 break; 533 534 default: 535 assert(!"not reached"); 536 break; 537 } 538 return; 539 } 540 541 ir->accept(this); 542 543 if (intel->gen >= 6) { 544 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 545 this->result, src_reg(1)); 546 inst->conditional_mod = BRW_CONDITIONAL_NZ; 547 } else { 548 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 549 inst->conditional_mod = BRW_CONDITIONAL_NZ; 550 } 551} 552 553/** 554 * Emit a gen6 IF statement with the comparison folded into the IF 555 * instruction. 556 */ 557void 558vec4_visitor::emit_if_gen6(ir_if *ir) 559{ 560 ir_expression *expr = ir->condition->as_expression(); 561 562 if (expr) { 563 src_reg op[2]; 564 vec4_instruction *inst; 565 dst_reg temp; 566 567 assert(expr->get_num_operands() <= 2); 568 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 569 expr->operands[i]->accept(this); 570 op[i] = this->result; 571 } 572 573 switch (expr->operation) { 574 case ir_unop_logic_not: 575 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 576 inst->conditional_mod = BRW_CONDITIONAL_Z; 577 return; 578 579 case ir_binop_logic_xor: 580 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 581 inst->conditional_mod = BRW_CONDITIONAL_NZ; 582 return; 583 584 case ir_binop_logic_or: 585 temp = dst_reg(this, glsl_type::bool_type); 586 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 587 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 588 inst->conditional_mod = BRW_CONDITIONAL_NZ; 589 return; 590 591 case ir_binop_logic_and: 592 temp = dst_reg(this, glsl_type::bool_type); 593 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 594 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 595 inst->conditional_mod = BRW_CONDITIONAL_NZ; 596 return; 597 598 case ir_unop_f2b: 599 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 600 inst->conditional_mod = BRW_CONDITIONAL_NZ; 601 return; 602 603 case ir_unop_i2b: 604 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 605 inst->conditional_mod = BRW_CONDITIONAL_NZ; 606 return; 607 608 case ir_binop_greater: 609 case ir_binop_gequal: 610 case ir_binop_less: 611 case ir_binop_lequal: 612 case ir_binop_equal: 613 case ir_binop_nequal: 614 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 615 inst->conditional_mod = 616 brw_conditional_for_comparison(expr->operation); 617 return; 618 619 case ir_binop_all_equal: 620 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 621 inst->conditional_mod = BRW_CONDITIONAL_Z; 622 623 inst = emit(BRW_OPCODE_IF); 624 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 625 return; 626 627 case ir_binop_any_nequal: 628 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 629 inst->conditional_mod = BRW_CONDITIONAL_NZ; 630 631 inst = emit(BRW_OPCODE_IF); 632 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 633 return; 634 635 case ir_unop_any: 636 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 637 inst->conditional_mod = BRW_CONDITIONAL_NZ; 638 639 inst = emit(BRW_OPCODE_IF); 640 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 641 return; 642 643 default: 644 assert(!"not reached"); 645 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 646 inst->conditional_mod = BRW_CONDITIONAL_NZ; 647 return; 648 } 649 return; 650 } 651 652 ir->condition->accept(this); 653 654 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 655 this->result, src_reg(0)); 656 inst->conditional_mod = BRW_CONDITIONAL_NZ; 657} 658 659void 660vec4_visitor::visit(ir_variable *ir) 661{ 662 dst_reg *reg = NULL; 663 664 if (variable_storage(ir)) 665 return; 666 667 switch (ir->mode) { 668 case ir_var_in: 669 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 670 break; 671 672 case ir_var_out: 673 reg = new(mem_ctx) dst_reg(this, ir->type); 674 675 for (int i = 0; i < type_size(ir->type); i++) { 676 output_reg[ir->location + i] = *reg; 677 output_reg[ir->location + i].reg_offset = i; 678 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; 679 } 680 break; 681 682 case ir_var_auto: 683 case ir_var_temporary: 684 reg = new(mem_ctx) dst_reg(this, ir->type); 685 break; 686 687 case ir_var_uniform: 688 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 689 690 if (!strncmp(ir->name, "gl_", 3)) { 691 setup_builtin_uniform_values(ir); 692 } else { 693 setup_uniform_values(ir->location, ir->type); 694 } 695 break; 696 697 default: 698 assert(!"not reached"); 699 } 700 701 reg->type = brw_type_for_base_type(ir->type); 702 hash_table_insert(this->variable_ht, reg, ir); 703} 704 705void 706vec4_visitor::visit(ir_loop *ir) 707{ 708 dst_reg counter; 709 710 /* We don't want debugging output to print the whole body of the 711 * loop as the annotation. 712 */ 713 this->base_ir = NULL; 714 715 if (ir->counter != NULL) { 716 this->base_ir = ir->counter; 717 ir->counter->accept(this); 718 counter = *(variable_storage(ir->counter)); 719 720 if (ir->from != NULL) { 721 this->base_ir = ir->from; 722 ir->from->accept(this); 723 724 emit(BRW_OPCODE_MOV, counter, this->result); 725 } 726 } 727 728 emit(BRW_OPCODE_DO); 729 730 if (ir->to) { 731 this->base_ir = ir->to; 732 ir->to->accept(this); 733 734 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), 735 src_reg(counter), this->result); 736 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 737 738 inst = emit(BRW_OPCODE_BREAK); 739 inst->predicate = BRW_PREDICATE_NORMAL; 740 } 741 742 visit_instructions(&ir->body_instructions); 743 744 745 if (ir->increment) { 746 this->base_ir = ir->increment; 747 ir->increment->accept(this); 748 emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); 749 } 750 751 emit(BRW_OPCODE_WHILE); 752} 753 754void 755vec4_visitor::visit(ir_loop_jump *ir) 756{ 757 switch (ir->mode) { 758 case ir_loop_jump::jump_break: 759 emit(BRW_OPCODE_BREAK); 760 break; 761 case ir_loop_jump::jump_continue: 762 emit(BRW_OPCODE_CONTINUE); 763 break; 764 } 765} 766 767 768void 769vec4_visitor::visit(ir_function_signature *ir) 770{ 771 assert(0); 772 (void)ir; 773} 774 775void 776vec4_visitor::visit(ir_function *ir) 777{ 778 /* Ignore function bodies other than main() -- we shouldn't see calls to 779 * them since they should all be inlined. 780 */ 781 if (strcmp(ir->name, "main") == 0) { 782 const ir_function_signature *sig; 783 exec_list empty; 784 785 sig = ir->matching_signature(&empty); 786 787 assert(sig); 788 789 visit_instructions(&sig->body); 790 } 791} 792 793GLboolean 794vec4_visitor::try_emit_sat(ir_expression *ir) 795{ 796 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 797 if (!sat_src) 798 return false; 799 800 sat_src->accept(this); 801 src_reg src = this->result; 802 803 this->result = src_reg(this, ir->type); 804 vec4_instruction *inst; 805 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 806 inst->saturate = true; 807 808 return true; 809} 810 811void 812vec4_visitor::emit_bool_comparison(unsigned int op, 813 dst_reg dst, src_reg src0, src_reg src1) 814{ 815 /* original gen4 does destination conversion before comparison. */ 816 if (intel->gen < 5) 817 dst.type = src0.type; 818 819 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 820 inst->conditional_mod = brw_conditional_for_comparison(op); 821 822 dst.type = BRW_REGISTER_TYPE_D; 823 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 824} 825 826void 827vec4_visitor::visit(ir_expression *ir) 828{ 829 unsigned int operand; 830 src_reg op[Elements(ir->operands)]; 831 src_reg result_src; 832 dst_reg result_dst; 833 vec4_instruction *inst; 834 835 if (try_emit_sat(ir)) 836 return; 837 838 for (operand = 0; operand < ir->get_num_operands(); operand++) { 839 this->result.file = BAD_FILE; 840 ir->operands[operand]->accept(this); 841 if (this->result.file == BAD_FILE) { 842 printf("Failed to get tree for expression operand:\n"); 843 ir->operands[operand]->print(); 844 exit(1); 845 } 846 op[operand] = this->result; 847 848 /* Matrix expression operands should have been broken down to vector 849 * operations already. 850 */ 851 assert(!ir->operands[operand]->type->is_matrix()); 852 } 853 854 int vector_elements = ir->operands[0]->type->vector_elements; 855 if (ir->operands[1]) { 856 vector_elements = MAX2(vector_elements, 857 ir->operands[1]->type->vector_elements); 858 } 859 860 this->result.file = BAD_FILE; 861 862 /* Storage for our result. Ideally for an assignment we'd be using 863 * the actual storage for the result here, instead. 864 */ 865 result_src = src_reg(this, ir->type); 866 /* convenience for the emit functions below. */ 867 result_dst = dst_reg(result_src); 868 /* If nothing special happens, this is the result. */ 869 this->result = result_src; 870 /* Limit writes to the channels that will be used by result_src later. 871 * This does limit this temp's use as a temporary for multi-instruction 872 * sequences. 873 */ 874 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 875 876 switch (ir->operation) { 877 case ir_unop_logic_not: 878 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 879 * ones complement of the whole register, not just bit 0. 880 */ 881 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 882 break; 883 case ir_unop_neg: 884 op[0].negate = !op[0].negate; 885 this->result = op[0]; 886 break; 887 case ir_unop_abs: 888 op[0].abs = true; 889 op[0].negate = false; 890 this->result = op[0]; 891 break; 892 893 case ir_unop_sign: 894 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 895 896 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 897 inst->conditional_mod = BRW_CONDITIONAL_G; 898 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 899 inst->predicate = BRW_PREDICATE_NORMAL; 900 901 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 902 inst->conditional_mod = BRW_CONDITIONAL_L; 903 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 904 inst->predicate = BRW_PREDICATE_NORMAL; 905 906 break; 907 908 case ir_unop_rcp: 909 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 910 break; 911 912 case ir_unop_exp2: 913 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 914 break; 915 case ir_unop_log2: 916 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 917 break; 918 case ir_unop_exp: 919 case ir_unop_log: 920 assert(!"not reached: should be handled by ir_explog_to_explog2"); 921 break; 922 case ir_unop_sin: 923 case ir_unop_sin_reduced: 924 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 925 break; 926 case ir_unop_cos: 927 case ir_unop_cos_reduced: 928 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 929 break; 930 931 case ir_unop_dFdx: 932 case ir_unop_dFdy: 933 assert(!"derivatives not valid in vertex shader"); 934 break; 935 936 case ir_unop_noise: 937 assert(!"not reached: should be handled by lower_noise"); 938 break; 939 940 case ir_binop_add: 941 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 942 break; 943 case ir_binop_sub: 944 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 945 break; 946 947 case ir_binop_mul: 948 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 949 break; 950 case ir_binop_div: 951 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 952 case ir_binop_mod: 953 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 954 break; 955 956 case ir_binop_less: 957 case ir_binop_greater: 958 case ir_binop_lequal: 959 case ir_binop_gequal: 960 case ir_binop_equal: 961 case ir_binop_nequal: { 962 dst_reg temp = result_dst; 963 /* original gen4 does implicit conversion before comparison. */ 964 if (intel->gen < 5) 965 temp.type = op[0].type; 966 967 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 968 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 969 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 970 break; 971 } 972 973 case ir_binop_all_equal: 974 /* "==" operator producing a scalar boolean. */ 975 if (ir->operands[0]->type->is_vector() || 976 ir->operands[1]->type->is_vector()) { 977 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 978 inst->conditional_mod = BRW_CONDITIONAL_Z; 979 980 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 981 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 982 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 983 } else { 984 dst_reg temp = result_dst; 985 /* original gen4 does implicit conversion before comparison. */ 986 if (intel->gen < 5) 987 temp.type = op[0].type; 988 989 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 990 inst->conditional_mod = BRW_CONDITIONAL_Z; 991 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 992 } 993 break; 994 case ir_binop_any_nequal: 995 /* "!=" operator producing a scalar boolean. */ 996 if (ir->operands[0]->type->is_vector() || 997 ir->operands[1]->type->is_vector()) { 998 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 999 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1000 1001 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1002 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1003 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1004 } else { 1005 dst_reg temp = result_dst; 1006 /* original gen4 does implicit conversion before comparison. */ 1007 if (intel->gen < 5) 1008 temp.type = op[0].type; 1009 1010 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 1011 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1012 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 1013 } 1014 break; 1015 1016 case ir_unop_any: 1017 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 1018 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1019 1020 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1021 1022 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1023 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1024 break; 1025 1026 case ir_binop_logic_xor: 1027 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1028 break; 1029 1030 case ir_binop_logic_or: 1031 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1032 break; 1033 1034 case ir_binop_logic_and: 1035 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1036 break; 1037 1038 case ir_binop_dot: 1039 assert(ir->operands[0]->type->is_vector()); 1040 assert(ir->operands[0]->type == ir->operands[1]->type); 1041 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1042 break; 1043 1044 case ir_unop_sqrt: 1045 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1046 break; 1047 case ir_unop_rsq: 1048 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1049 break; 1050 case ir_unop_i2f: 1051 case ir_unop_i2u: 1052 case ir_unop_u2i: 1053 case ir_unop_u2f: 1054 case ir_unop_b2f: 1055 case ir_unop_b2i: 1056 case ir_unop_f2i: 1057 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1058 break; 1059 case ir_unop_f2b: 1060 case ir_unop_i2b: { 1061 dst_reg temp = result_dst; 1062 /* original gen4 does implicit conversion before comparison. */ 1063 if (intel->gen < 5) 1064 temp.type = op[0].type; 1065 1066 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1067 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1068 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1069 break; 1070 } 1071 1072 case ir_unop_trunc: 1073 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1074 break; 1075 case ir_unop_ceil: 1076 op[0].negate = !op[0].negate; 1077 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1078 this->result.negate = true; 1079 break; 1080 case ir_unop_floor: 1081 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1082 break; 1083 case ir_unop_fract: 1084 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1085 break; 1086 case ir_unop_round_even: 1087 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1088 break; 1089 1090 case ir_binop_min: 1091 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1092 inst->conditional_mod = BRW_CONDITIONAL_L; 1093 1094 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1095 inst->predicate = BRW_PREDICATE_NORMAL; 1096 break; 1097 case ir_binop_max: 1098 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1099 inst->conditional_mod = BRW_CONDITIONAL_G; 1100 1101 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1102 inst->predicate = BRW_PREDICATE_NORMAL; 1103 break; 1104 1105 case ir_binop_pow: 1106 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1107 break; 1108 1109 case ir_unop_bit_not: 1110 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1111 break; 1112 case ir_binop_bit_and: 1113 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1114 break; 1115 case ir_binop_bit_xor: 1116 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1117 break; 1118 case ir_binop_bit_or: 1119 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1120 break; 1121 1122 case ir_binop_lshift: 1123 case ir_binop_rshift: 1124 assert(!"GLSL 1.30 features unsupported"); 1125 break; 1126 1127 case ir_quadop_vector: 1128 assert(!"not reached: should be handled by lower_quadop_vector"); 1129 break; 1130 } 1131} 1132 1133 1134void 1135vec4_visitor::visit(ir_swizzle *ir) 1136{ 1137 src_reg src; 1138 int i = 0; 1139 int swizzle[4]; 1140 1141 /* Note that this is only swizzles in expressions, not those on the left 1142 * hand side of an assignment, which do write masking. See ir_assignment 1143 * for that. 1144 */ 1145 1146 ir->val->accept(this); 1147 src = this->result; 1148 assert(src.file != BAD_FILE); 1149 1150 for (i = 0; i < ir->type->vector_elements; i++) { 1151 switch (i) { 1152 case 0: 1153 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1154 break; 1155 case 1: 1156 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1157 break; 1158 case 2: 1159 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1160 break; 1161 case 3: 1162 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1163 break; 1164 } 1165 } 1166 for (; i < 4; i++) { 1167 /* Replicate the last channel out. */ 1168 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1169 } 1170 1171 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1172 1173 this->result = src; 1174} 1175 1176void 1177vec4_visitor::visit(ir_dereference_variable *ir) 1178{ 1179 const struct glsl_type *type = ir->type; 1180 dst_reg *reg = variable_storage(ir->var); 1181 1182 if (!reg) { 1183 fail("Failed to find variable storage for %s\n", ir->var->name); 1184 this->result = src_reg(brw_null_reg()); 1185 return; 1186 } 1187 1188 this->result = src_reg(*reg); 1189 1190 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1191 this->result.swizzle = swizzle_for_size(type->vector_elements); 1192} 1193 1194void 1195vec4_visitor::visit(ir_dereference_array *ir) 1196{ 1197 ir_constant *constant_index; 1198 src_reg src; 1199 int element_size = type_size(ir->type); 1200 1201 constant_index = ir->array_index->constant_expression_value(); 1202 1203 ir->array->accept(this); 1204 src = this->result; 1205 1206 if (constant_index) { 1207 src.reg_offset += constant_index->value.i[0] * element_size; 1208 } else { 1209 /* Variable index array dereference. It eats the "vec4" of the 1210 * base of the array and an index that offsets the Mesa register 1211 * index. 1212 */ 1213 ir->array_index->accept(this); 1214 1215 src_reg index_reg; 1216 1217 if (element_size == 1) { 1218 index_reg = this->result; 1219 } else { 1220 index_reg = src_reg(this, glsl_type::int_type); 1221 1222 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1223 this->result, src_reg(element_size)); 1224 } 1225 1226 if (src.reladdr) { 1227 src_reg temp = src_reg(this, glsl_type::int_type); 1228 1229 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); 1230 1231 index_reg = temp; 1232 } 1233 1234 src.reladdr = ralloc(mem_ctx, src_reg); 1235 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1236 } 1237 1238 /* If the type is smaller than a vec4, replicate the last channel out. */ 1239 if (ir->type->is_scalar() || ir->type->is_vector()) 1240 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1241 else 1242 src.swizzle = BRW_SWIZZLE_NOOP; 1243 src.type = brw_type_for_base_type(ir->type); 1244 1245 this->result = src; 1246} 1247 1248void 1249vec4_visitor::visit(ir_dereference_record *ir) 1250{ 1251 unsigned int i; 1252 const glsl_type *struct_type = ir->record->type; 1253 int offset = 0; 1254 1255 ir->record->accept(this); 1256 1257 for (i = 0; i < struct_type->length; i++) { 1258 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1259 break; 1260 offset += type_size(struct_type->fields.structure[i].type); 1261 } 1262 1263 /* If the type is smaller than a vec4, replicate the last channel out. */ 1264 if (ir->type->is_scalar() || ir->type->is_vector()) 1265 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1266 else 1267 this->result.swizzle = BRW_SWIZZLE_NOOP; 1268 this->result.type = brw_type_for_base_type(ir->type); 1269 1270 this->result.reg_offset += offset; 1271} 1272 1273/** 1274 * We want to be careful in assignment setup to hit the actual storage 1275 * instead of potentially using a temporary like we might with the 1276 * ir_dereference handler. 1277 */ 1278static dst_reg 1279get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1280{ 1281 /* The LHS must be a dereference. If the LHS is a variable indexed array 1282 * access of a vector, it must be separated into a series conditional moves 1283 * before reaching this point (see ir_vec_index_to_cond_assign). 1284 */ 1285 assert(ir->as_dereference()); 1286 ir_dereference_array *deref_array = ir->as_dereference_array(); 1287 if (deref_array) { 1288 assert(!deref_array->array->type->is_vector()); 1289 } 1290 1291 /* Use the rvalue deref handler for the most part. We'll ignore 1292 * swizzles in it and write swizzles using writemask, though. 1293 */ 1294 ir->accept(v); 1295 return dst_reg(v->result); 1296} 1297 1298void 1299vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, 1300 const struct glsl_type *type, bool predicated) 1301{ 1302 if (type->base_type == GLSL_TYPE_STRUCT) { 1303 for (unsigned int i = 0; i < type->length; i++) { 1304 emit_block_move(dst, src, type->fields.structure[i].type, predicated); 1305 } 1306 return; 1307 } 1308 1309 if (type->is_array()) { 1310 for (unsigned int i = 0; i < type->length; i++) { 1311 emit_block_move(dst, src, type->fields.array, predicated); 1312 } 1313 return; 1314 } 1315 1316 if (type->is_matrix()) { 1317 const struct glsl_type *vec_type; 1318 1319 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 1320 type->vector_elements, 1); 1321 1322 for (int i = 0; i < type->matrix_columns; i++) { 1323 emit_block_move(dst, src, vec_type, predicated); 1324 } 1325 return; 1326 } 1327 1328 assert(type->is_scalar() || type->is_vector()); 1329 1330 dst->type = brw_type_for_base_type(type); 1331 src->type = dst->type; 1332 1333 dst->writemask = (1 << type->vector_elements) - 1; 1334 1335 /* Do we need to worry about swizzling a swizzle? */ 1336 assert(src->swizzle = BRW_SWIZZLE_NOOP); 1337 src->swizzle = swizzle_for_size(type->vector_elements); 1338 1339 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); 1340 if (predicated) 1341 inst->predicate = BRW_PREDICATE_NORMAL; 1342 1343 dst->reg_offset++; 1344 src->reg_offset++; 1345} 1346 1347 1348/* If the RHS processing resulted in an instruction generating a 1349 * temporary value, and it would be easy to rewrite the instruction to 1350 * generate its result right into the LHS instead, do so. This ends 1351 * up reliably removing instructions where it can be tricky to do so 1352 * later without real UD chain information. 1353 */ 1354bool 1355vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, 1356 dst_reg dst, 1357 src_reg src, 1358 vec4_instruction *pre_rhs_inst, 1359 vec4_instruction *last_rhs_inst) 1360{ 1361 /* This could be supported, but it would take more smarts. */ 1362 if (ir->condition) 1363 return false; 1364 1365 if (pre_rhs_inst == last_rhs_inst) 1366 return false; /* No instructions generated to work with. */ 1367 1368 /* Make sure the last instruction generated our source reg. */ 1369 if (src.file != GRF || 1370 src.file != last_rhs_inst->dst.file || 1371 src.reg != last_rhs_inst->dst.reg || 1372 src.reg_offset != last_rhs_inst->dst.reg_offset || 1373 src.reladdr || 1374 src.abs || 1375 src.negate || 1376 last_rhs_inst->predicate != BRW_PREDICATE_NONE) 1377 return false; 1378 1379 /* Check that that last instruction fully initialized the channels 1380 * we want to use, in the order we want to use them. We could 1381 * potentially reswizzle the operands of many instructions so that 1382 * we could handle out of order channels, but don't yet. 1383 */ 1384 for (int i = 0; i < 4; i++) { 1385 if (dst.writemask & (1 << i)) { 1386 if (!(last_rhs_inst->dst.writemask & (1 << i))) 1387 return false; 1388 1389 if (BRW_GET_SWZ(src.swizzle, i) != i) 1390 return false; 1391 } 1392 } 1393 1394 /* Success! Rewrite the instruction. */ 1395 last_rhs_inst->dst.file = dst.file; 1396 last_rhs_inst->dst.reg = dst.reg; 1397 last_rhs_inst->dst.reg_offset = dst.reg_offset; 1398 last_rhs_inst->dst.reladdr = dst.reladdr; 1399 last_rhs_inst->dst.writemask &= dst.writemask; 1400 1401 return true; 1402} 1403 1404void 1405vec4_visitor::visit(ir_assignment *ir) 1406{ 1407 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1408 1409 if (!ir->lhs->type->is_scalar() && 1410 !ir->lhs->type->is_vector()) { 1411 ir->rhs->accept(this); 1412 src_reg src = this->result; 1413 1414 if (ir->condition) { 1415 emit_bool_to_cond_code(ir->condition); 1416 } 1417 1418 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); 1419 return; 1420 } 1421 1422 /* Now we're down to just a scalar/vector with writemasks. */ 1423 int i; 1424 1425 vec4_instruction *pre_rhs_inst, *last_rhs_inst; 1426 pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); 1427 1428 ir->rhs->accept(this); 1429 1430 last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); 1431 1432 src_reg src = this->result; 1433 1434 int swizzles[4]; 1435 int first_enabled_chan = 0; 1436 int src_chan = 0; 1437 1438 assert(ir->lhs->type->is_vector() || 1439 ir->lhs->type->is_scalar()); 1440 dst.writemask = ir->write_mask; 1441 1442 for (int i = 0; i < 4; i++) { 1443 if (dst.writemask & (1 << i)) { 1444 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1445 break; 1446 } 1447 } 1448 1449 /* Swizzle a small RHS vector into the channels being written. 1450 * 1451 * glsl ir treats write_mask as dictating how many channels are 1452 * present on the RHS while in our instructions we need to make 1453 * those channels appear in the slots of the vec4 they're written to. 1454 */ 1455 for (int i = 0; i < 4; i++) { 1456 if (dst.writemask & (1 << i)) 1457 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1458 else 1459 swizzles[i] = first_enabled_chan; 1460 } 1461 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1462 swizzles[2], swizzles[3]); 1463 1464 if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { 1465 return; 1466 } 1467 1468 if (ir->condition) { 1469 emit_bool_to_cond_code(ir->condition); 1470 } 1471 1472 for (i = 0; i < type_size(ir->lhs->type); i++) { 1473 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1474 1475 if (ir->condition) 1476 inst->predicate = BRW_PREDICATE_NORMAL; 1477 1478 dst.reg_offset++; 1479 src.reg_offset++; 1480 } 1481} 1482 1483void 1484vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) 1485{ 1486 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1487 foreach_list(node, &ir->components) { 1488 ir_constant *field_value = (ir_constant *)node; 1489 1490 emit_constant_values(dst, field_value); 1491 } 1492 return; 1493 } 1494 1495 if (ir->type->is_array()) { 1496 for (unsigned int i = 0; i < ir->type->length; i++) { 1497 emit_constant_values(dst, ir->array_elements[i]); 1498 } 1499 return; 1500 } 1501 1502 if (ir->type->is_matrix()) { 1503 for (int i = 0; i < ir->type->matrix_columns; i++) { 1504 for (int j = 0; j < ir->type->vector_elements; j++) { 1505 dst->writemask = 1 << j; 1506 dst->type = BRW_REGISTER_TYPE_F; 1507 1508 emit(BRW_OPCODE_MOV, *dst, 1509 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1510 } 1511 dst->reg_offset++; 1512 } 1513 return; 1514 } 1515 1516 for (int i = 0; i < ir->type->vector_elements; i++) { 1517 dst->writemask = 1 << i; 1518 dst->type = brw_type_for_base_type(ir->type); 1519 1520 switch (ir->type->base_type) { 1521 case GLSL_TYPE_FLOAT: 1522 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); 1523 break; 1524 case GLSL_TYPE_INT: 1525 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); 1526 break; 1527 case GLSL_TYPE_UINT: 1528 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); 1529 break; 1530 case GLSL_TYPE_BOOL: 1531 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); 1532 break; 1533 default: 1534 assert(!"Non-float/uint/int/bool constant"); 1535 break; 1536 } 1537 } 1538 dst->reg_offset++; 1539} 1540 1541void 1542vec4_visitor::visit(ir_constant *ir) 1543{ 1544 dst_reg dst = dst_reg(this, ir->type); 1545 this->result = src_reg(dst); 1546 1547 emit_constant_values(&dst, ir); 1548} 1549 1550void 1551vec4_visitor::visit(ir_call *ir) 1552{ 1553 assert(!"not reached"); 1554} 1555 1556void 1557vec4_visitor::visit(ir_texture *ir) 1558{ 1559 /* FINISHME: Implement vertex texturing. 1560 * 1561 * With 0 vertex samplers available, the linker will reject 1562 * programs that do vertex texturing, but after our visitor has 1563 * run. 1564 */ 1565} 1566 1567void 1568vec4_visitor::visit(ir_return *ir) 1569{ 1570 assert(!"not reached"); 1571} 1572 1573void 1574vec4_visitor::visit(ir_discard *ir) 1575{ 1576 assert(!"not reached"); 1577} 1578 1579void 1580vec4_visitor::visit(ir_if *ir) 1581{ 1582 /* Don't point the annotation at the if statement, because then it plus 1583 * the then and else blocks get printed. 1584 */ 1585 this->base_ir = ir->condition; 1586 1587 if (intel->gen == 6) { 1588 emit_if_gen6(ir); 1589 } else { 1590 emit_bool_to_cond_code(ir->condition); 1591 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1592 inst->predicate = BRW_PREDICATE_NORMAL; 1593 } 1594 1595 visit_instructions(&ir->then_instructions); 1596 1597 if (!ir->else_instructions.is_empty()) { 1598 this->base_ir = ir->condition; 1599 emit(BRW_OPCODE_ELSE); 1600 1601 visit_instructions(&ir->else_instructions); 1602 } 1603 1604 this->base_ir = ir->condition; 1605 emit(BRW_OPCODE_ENDIF); 1606} 1607 1608int 1609vec4_visitor::emit_vue_header_gen4(int header_mrf) 1610{ 1611 /* Get the position */ 1612 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1613 1614 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1615 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1616 1617 current_annotation = "NDC"; 1618 dst_reg ndc_w = ndc; 1619 ndc_w.writemask = WRITEMASK_W; 1620 src_reg pos_w = pos; 1621 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1622 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1623 1624 dst_reg ndc_xyz = ndc; 1625 ndc_xyz.writemask = WRITEMASK_XYZ; 1626 1627 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1628 1629 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1630 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1631 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1632 GLuint i; 1633 1634 emit(BRW_OPCODE_MOV, header1, 0u); 1635 1636 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1637 assert(!"finishme: psiz"); 1638 src_reg psiz; 1639 1640 header1.writemask = WRITEMASK_W; 1641 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1642 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1643 } 1644 1645 for (i = 0; i < c->key.nr_userclip; i++) { 1646 vec4_instruction *inst; 1647 1648 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1649 pos, src_reg(c->userplane[i])); 1650 inst->conditional_mod = BRW_CONDITIONAL_L; 1651 1652 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1653 inst->predicate = BRW_PREDICATE_NORMAL; 1654 } 1655 1656 /* i965 clipping workaround: 1657 * 1) Test for -ve rhw 1658 * 2) If set, 1659 * set ndc = (0,0,0,0) 1660 * set ucp[6] = 1 1661 * 1662 * Later, clipping will detect ucp[6] and ensure the primitive is 1663 * clipped against all fixed planes. 1664 */ 1665 if (brw->has_negative_rhw_bug) { 1666#if 0 1667 /* FINISHME */ 1668 brw_CMP(p, 1669 vec8(brw_null_reg()), 1670 BRW_CONDITIONAL_L, 1671 brw_swizzle1(ndc, 3), 1672 brw_imm_f(0)); 1673 1674 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1675 brw_MOV(p, ndc, brw_imm_f(0)); 1676 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1677#endif 1678 } 1679 1680 header1.writemask = WRITEMASK_XYZW; 1681 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1682 } else { 1683 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1684 BRW_REGISTER_TYPE_UD), 0u); 1685 } 1686 1687 if (intel->gen == 5) { 1688 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1689 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1690 * dword 4-7 (m2) is the ndc position (set above) 1691 * dword 8-11 (m3) of the vertex header is the 4D space position 1692 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1693 * m6 is a pad so that the vertex element data is aligned 1694 * m7 is the first vertex data we fill. 1695 */ 1696 current_annotation = "NDC"; 1697 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1698 1699 current_annotation = "gl_Position"; 1700 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1701 1702 /* user clip distance. */ 1703 header_mrf += 2; 1704 1705 /* Pad so that vertex element data is aligned. */ 1706 header_mrf++; 1707 } else { 1708 /* There are 8 dwords in VUE header pre-Ironlake: 1709 * dword 0-3 (m1) is indices, point width, clip flags. 1710 * dword 4-7 (m2) is ndc position (set above) 1711 * 1712 * dword 8-11 (m3) is the first vertex data. 1713 */ 1714 current_annotation = "NDC"; 1715 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1716 1717 current_annotation = "gl_Position"; 1718 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1719 } 1720 1721 return header_mrf; 1722} 1723 1724int 1725vec4_visitor::emit_vue_header_gen6(int header_mrf) 1726{ 1727 struct brw_reg reg; 1728 1729 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1730 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1731 * dword 4-7 (m3) is the 4D space position 1732 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1733 * enabled. 1734 * 1735 * m4 or 6 is the first vertex element data we fill. 1736 */ 1737 1738 current_annotation = "indices, point width, clip flags"; 1739 reg = brw_message_reg(header_mrf++); 1740 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1741 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1742 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1743 src_reg(output_reg[VERT_RESULT_PSIZ])); 1744 } 1745 1746 current_annotation = "gl_Position"; 1747 emit(BRW_OPCODE_MOV, 1748 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1749 1750 current_annotation = "user clip distances"; 1751 if (c->key.nr_userclip) { 1752 for (int i = 0; i < c->key.nr_userclip; i++) { 1753 struct brw_reg m; 1754 if (i < 4) 1755 m = brw_message_reg(header_mrf); 1756 else 1757 m = brw_message_reg(header_mrf + 1); 1758 1759 emit(BRW_OPCODE_DP4, 1760 dst_reg(brw_writemask(m, 1 << (i & 3))), 1761 src_reg(c->userplane[i])); 1762 } 1763 header_mrf += 2; 1764 } 1765 1766 current_annotation = NULL; 1767 1768 return header_mrf; 1769} 1770 1771static int 1772align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1773{ 1774 struct intel_context *intel = &brw->intel; 1775 1776 if (intel->gen >= 6) { 1777 /* URB data written (does not include the message header reg) must 1778 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1779 * section 5.4.3.2.2: URB_INTERLEAVED. 1780 * 1781 * URB entries are allocated on a multiple of 1024 bits, so an 1782 * extra 128 bits written here to make the end align to 256 is 1783 * no problem. 1784 */ 1785 if ((mlen % 2) != 1) 1786 mlen++; 1787 } 1788 1789 return mlen; 1790} 1791 1792/** 1793 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1794 * complete the VS thread. 1795 * 1796 * The VUE layout is documented in Volume 2a. 1797 */ 1798void 1799vec4_visitor::emit_urb_writes() 1800{ 1801 /* MRF 0 is reserved for the debugger, so start with message header 1802 * in MRF 1. 1803 */ 1804 int base_mrf = 1; 1805 int mrf = base_mrf; 1806 int urb_entry_size; 1807 uint64_t outputs_remaining = c->prog_data.outputs_written; 1808 /* In the process of generating our URB write message contents, we 1809 * may need to unspill a register or load from an array. Those 1810 * reads would use MRFs 14-15. 1811 */ 1812 int max_usable_mrf = 13; 1813 1814 /* FINISHME: edgeflag */ 1815 1816 /* First mrf is the g0-based message header containing URB handles and such, 1817 * which is implied in VS_OPCODE_URB_WRITE. 1818 */ 1819 mrf++; 1820 1821 if (intel->gen >= 6) { 1822 mrf = emit_vue_header_gen6(mrf); 1823 } else { 1824 mrf = emit_vue_header_gen4(mrf); 1825 } 1826 1827 /* Set up the VUE data for the first URB write */ 1828 int attr; 1829 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1830 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1831 continue; 1832 1833 outputs_remaining &= ~BITFIELD64_BIT(attr); 1834 1835 /* This is set up in the VUE header. */ 1836 if (attr == VERT_RESULT_HPOS) 1837 continue; 1838 1839 /* This is loaded into the VUE header, and thus doesn't occupy 1840 * an attribute slot. 1841 */ 1842 if (attr == VERT_RESULT_PSIZ) 1843 continue; 1844 1845 vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), 1846 src_reg(output_reg[attr])); 1847 1848 if ((attr == VERT_RESULT_COL0 || 1849 attr == VERT_RESULT_COL1 || 1850 attr == VERT_RESULT_BFC0 || 1851 attr == VERT_RESULT_BFC1) && 1852 c->key.clamp_vertex_color) { 1853 inst->saturate = true; 1854 } 1855 1856 /* If this was MRF 15, we can't fit anything more into this URB 1857 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1858 * even-numbered amount of URB write data, which will meet 1859 * gen6's requirements for length alignment. 1860 */ 1861 if (mrf > max_usable_mrf) { 1862 attr++; 1863 break; 1864 } 1865 } 1866 1867 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1868 inst->base_mrf = base_mrf; 1869 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1870 inst->eot = !outputs_remaining; 1871 1872 urb_entry_size = mrf - base_mrf; 1873 1874 /* Optional second URB write */ 1875 if (outputs_remaining) { 1876 mrf = base_mrf + 1; 1877 1878 for (; attr < VERT_RESULT_MAX; attr++) { 1879 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1880 continue; 1881 1882 assert(mrf < max_usable_mrf); 1883 1884 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1885 } 1886 1887 inst = emit(VS_OPCODE_URB_WRITE); 1888 inst->base_mrf = base_mrf; 1889 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1890 inst->eot = true; 1891 /* URB destination offset. In the previous write, we got MRFs 1892 * 2-13 minus the one header MRF, so 12 regs. URB offset is in 1893 * URB row increments, and each of our MRFs is half of one of 1894 * those, since we're doing interleaved writes. 1895 */ 1896 inst->offset = (max_usable_mrf - base_mrf) / 2; 1897 1898 urb_entry_size += mrf - base_mrf; 1899 } 1900 1901 if (intel->gen == 6) 1902 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1903 else 1904 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1905} 1906 1907src_reg 1908vec4_visitor::get_scratch_offset(vec4_instruction *inst, 1909 src_reg *reladdr, int reg_offset) 1910{ 1911 /* Because we store the values to scratch interleaved like our 1912 * vertex data, we need to scale the vec4 index by 2. 1913 */ 1914 int message_header_scale = 2; 1915 1916 /* Pre-gen6, the message header uses byte offsets instead of vec4 1917 * (16-byte) offset units. 1918 */ 1919 if (intel->gen < 6) 1920 message_header_scale *= 16; 1921 1922 if (reladdr) { 1923 src_reg index = src_reg(this, glsl_type::int_type); 1924 1925 vec4_instruction *add = emit(BRW_OPCODE_ADD, 1926 dst_reg(index), 1927 *reladdr, 1928 src_reg(reg_offset)); 1929 /* Move our new instruction from the tail to its correct place. */ 1930 add->remove(); 1931 inst->insert_before(add); 1932 1933 vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), 1934 index, src_reg(message_header_scale)); 1935 mul->remove(); 1936 inst->insert_before(mul); 1937 1938 return index; 1939 } else { 1940 return src_reg(reg_offset * message_header_scale); 1941 } 1942} 1943 1944/** 1945 * Emits an instruction before @inst to load the value named by @orig_src 1946 * from scratch space at @base_offset to @temp. 1947 */ 1948void 1949vec4_visitor::emit_scratch_read(vec4_instruction *inst, 1950 dst_reg temp, src_reg orig_src, 1951 int base_offset) 1952{ 1953 int reg_offset = base_offset + orig_src.reg_offset; 1954 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); 1955 1956 vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, 1957 temp, index); 1958 1959 scratch_read_inst->base_mrf = 14; 1960 scratch_read_inst->mlen = 1; 1961 /* Move our instruction from the tail to its correct place. */ 1962 scratch_read_inst->remove(); 1963 inst->insert_before(scratch_read_inst); 1964} 1965 1966/** 1967 * Emits an instruction after @inst to store the value to be written 1968 * to @orig_dst to scratch space at @base_offset, from @temp. 1969 */ 1970void 1971vec4_visitor::emit_scratch_write(vec4_instruction *inst, 1972 src_reg temp, dst_reg orig_dst, 1973 int base_offset) 1974{ 1975 int reg_offset = base_offset + orig_dst.reg_offset; 1976 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); 1977 1978 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), 1979 orig_dst.writemask)); 1980 vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, 1981 dst, temp, index); 1982 scratch_write_inst->base_mrf = 13; 1983 scratch_write_inst->mlen = 2; 1984 scratch_write_inst->predicate = inst->predicate; 1985 /* Move our instruction from the tail to its correct place. */ 1986 scratch_write_inst->remove(); 1987 inst->insert_after(scratch_write_inst); 1988} 1989 1990/** 1991 * We can't generally support array access in GRF space, because a 1992 * single instruction's destination can only span 2 contiguous 1993 * registers. So, we send all GRF arrays that get variable index 1994 * access to scratch space. 1995 */ 1996void 1997vec4_visitor::move_grf_array_access_to_scratch() 1998{ 1999 int scratch_loc[this->virtual_grf_count]; 2000 2001 for (int i = 0; i < this->virtual_grf_count; i++) { 2002 scratch_loc[i] = -1; 2003 } 2004 2005 /* First, calculate the set of virtual GRFs that need to be punted 2006 * to scratch due to having any array access on them, and where in 2007 * scratch. 2008 */ 2009 foreach_list(node, &this->instructions) { 2010 vec4_instruction *inst = (vec4_instruction *)node; 2011 2012 if (inst->dst.file == GRF && inst->dst.reladdr && 2013 scratch_loc[inst->dst.reg] == -1) { 2014 scratch_loc[inst->dst.reg] = c->last_scratch; 2015 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; 2016 } 2017 2018 for (int i = 0 ; i < 3; i++) { 2019 src_reg *src = &inst->src[i]; 2020 2021 if (src->file == GRF && src->reladdr && 2022 scratch_loc[src->reg] == -1) { 2023 scratch_loc[src->reg] = c->last_scratch; 2024 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; 2025 } 2026 } 2027 } 2028 2029 /* Now, for anything that will be accessed through scratch, rewrite 2030 * it to load/store. Note that this is a _safe list walk, because 2031 * we may generate a new scratch_write instruction after the one 2032 * we're processing. 2033 */ 2034 foreach_list_safe(node, &this->instructions) { 2035 vec4_instruction *inst = (vec4_instruction *)node; 2036 2037 /* Set up the annotation tracking for new generated instructions. */ 2038 base_ir = inst->ir; 2039 current_annotation = inst->annotation; 2040 2041 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { 2042 src_reg temp = src_reg(this, glsl_type::vec4_type); 2043 2044 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); 2045 2046 inst->dst.file = temp.file; 2047 inst->dst.reg = temp.reg; 2048 inst->dst.reg_offset = temp.reg_offset; 2049 inst->dst.reladdr = NULL; 2050 } 2051 2052 for (int i = 0 ; i < 3; i++) { 2053 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) 2054 continue; 2055 2056 dst_reg temp = dst_reg(this, glsl_type::vec4_type); 2057 2058 emit_scratch_read(inst, temp, inst->src[i], 2059 scratch_loc[inst->src[i].reg]); 2060 2061 inst->src[i].file = temp.file; 2062 inst->src[i].reg = temp.reg; 2063 inst->src[i].reg_offset = temp.reg_offset; 2064 inst->src[i].reladdr = NULL; 2065 } 2066 } 2067} 2068 2069 2070vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 2071 struct gl_shader_program *prog, 2072 struct brw_shader *shader) 2073{ 2074 this->c = c; 2075 this->p = &c->func; 2076 this->brw = p->brw; 2077 this->intel = &brw->intel; 2078 this->ctx = &intel->ctx; 2079 this->prog = prog; 2080 this->shader = shader; 2081 2082 this->mem_ctx = ralloc_context(NULL); 2083 this->failed = false; 2084 2085 this->base_ir = NULL; 2086 this->current_annotation = NULL; 2087 2088 this->c = c; 2089 this->vp = prog->VertexProgram; 2090 this->prog_data = &c->prog_data; 2091 2092 this->variable_ht = hash_table_ctor(0, 2093 hash_table_pointer_hash, 2094 hash_table_pointer_compare); 2095 2096 this->virtual_grf_sizes = NULL; 2097 this->virtual_grf_count = 0; 2098 this->virtual_grf_array_size = 0; 2099 2100 this->uniforms = 0; 2101 2102 this->variable_ht = hash_table_ctor(0, 2103 hash_table_pointer_hash, 2104 hash_table_pointer_compare); 2105} 2106 2107vec4_visitor::~vec4_visitor() 2108{ 2109 ralloc_free(this->mem_ctx); 2110 hash_table_dtor(this->variable_ht); 2111} 2112 2113 2114void 2115vec4_visitor::fail(const char *format, ...) 2116{ 2117 va_list va; 2118 char *msg; 2119 2120 if (failed) 2121 return; 2122 2123 failed = true; 2124 2125 va_start(va, format); 2126 msg = ralloc_vasprintf(mem_ctx, format, va); 2127 va_end(va); 2128 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 2129 2130 this->fail_msg = msg; 2131 2132 if (INTEL_DEBUG & DEBUG_VS) { 2133 fprintf(stderr, "%s", msg); 2134 } 2135} 2136 2137} /* namespace brw */ 2138