brw_vec4_visitor.cpp revision aed5e353e95f47773864c6e61c506b9ddad0e2e9
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30namespace brw { 31 32src_reg::src_reg(dst_reg reg) 33{ 34 init(); 35 36 this->file = reg.file; 37 this->reg = reg.reg; 38 this->reg_offset = reg.reg_offset; 39 this->type = reg.type; 40 this->reladdr = reg.reladdr; 41 42 int swizzles[4]; 43 int next_chan = 0; 44 int last = 0; 45 46 for (int i = 0; i < 4; i++) { 47 if (!(reg.writemask & (1 << i))) 48 continue; 49 50 swizzles[next_chan++] = last = i; 51 } 52 53 for (; next_chan < 4; next_chan++) { 54 swizzles[next_chan] = last; 55 } 56 57 this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 58 swizzles[2], swizzles[3]); 59} 60 61dst_reg::dst_reg(src_reg reg) 62{ 63 init(); 64 65 this->file = reg.file; 66 this->reg = reg.reg; 67 this->reg_offset = reg.reg_offset; 68 this->type = reg.type; 69 this->writemask = WRITEMASK_XYZW; 70 this->reladdr = reg.reladdr; 71} 72 73vec4_instruction * 74vec4_visitor::emit(enum opcode opcode, dst_reg dst, 75 src_reg src0, src_reg src1, src_reg src2) 76{ 77 vec4_instruction *inst = new(mem_ctx) vec4_instruction(); 78 79 inst->opcode = opcode; 80 inst->dst = dst; 81 inst->src[0] = src0; 82 inst->src[1] = src1; 83 inst->src[2] = src2; 84 inst->ir = this->base_ir; 85 inst->annotation = this->current_annotation; 86 87 this->instructions.push_tail(inst); 88 89 return inst; 90} 91 92 93vec4_instruction * 94vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) 95{ 96 return emit(opcode, dst, src0, src1, src_reg()); 97} 98 99vec4_instruction * 100vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) 101{ 102 assert(dst.writemask != 0); 103 return emit(opcode, dst, src0, src_reg(), src_reg()); 104} 105 106vec4_instruction * 107vec4_visitor::emit(enum opcode opcode) 108{ 109 return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); 110} 111 112void 113vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) 114{ 115 static enum opcode dot_opcodes[] = { 116 BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 117 }; 118 119 emit(dot_opcodes[elements - 2], dst, src0, src1); 120} 121 122void 123vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) 124{ 125 /* The gen6 math instruction ignores the source modifiers -- 126 * swizzle, abs, negate, and at least some parts of the register 127 * region description. 128 */ 129 src_reg temp_src = src_reg(this, glsl_type::vec4_type); 130 emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); 131 132 if (dst.writemask != WRITEMASK_XYZW) { 133 /* The gen6 math instruction must be align1, so we can't do 134 * writemasks. 135 */ 136 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); 137 138 emit(opcode, temp_dst, temp_src); 139 140 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); 141 } else { 142 emit(opcode, dst, temp_src); 143 } 144} 145 146void 147vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) 148{ 149 vec4_instruction *inst = emit(opcode, dst, src); 150 inst->base_mrf = 1; 151 inst->mlen = 1; 152} 153 154void 155vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) 156{ 157 switch (opcode) { 158 case SHADER_OPCODE_RCP: 159 case SHADER_OPCODE_RSQ: 160 case SHADER_OPCODE_SQRT: 161 case SHADER_OPCODE_EXP2: 162 case SHADER_OPCODE_LOG2: 163 case SHADER_OPCODE_SIN: 164 case SHADER_OPCODE_COS: 165 break; 166 default: 167 assert(!"not reached: bad math opcode"); 168 return; 169 } 170 171 if (intel->gen >= 6) { 172 return emit_math1_gen6(opcode, dst, src); 173 } else { 174 return emit_math1_gen4(opcode, dst, src); 175 } 176} 177 178void 179vec4_visitor::emit_math2_gen6(enum opcode opcode, 180 dst_reg dst, src_reg src0, src_reg src1) 181{ 182 src_reg expanded; 183 184 /* The gen6 math instruction ignores the source modifiers -- 185 * swizzle, abs, negate, and at least some parts of the register 186 * region description. Move the sources to temporaries to make it 187 * generally work. 188 */ 189 190 expanded = src_reg(this, glsl_type::vec4_type); 191 emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); 192 src0 = expanded; 193 194 expanded = src_reg(this, glsl_type::vec4_type); 195 emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); 196 src1 = expanded; 197 198 if (dst.writemask != WRITEMASK_XYZW) { 199 /* The gen6 math instruction must be align1, so we can't do 200 * writemasks. 201 */ 202 dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); 203 204 emit(opcode, temp_dst, src0, src1); 205 206 emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); 207 } else { 208 emit(opcode, dst, src0, src1); 209 } 210} 211 212void 213vec4_visitor::emit_math2_gen4(enum opcode opcode, 214 dst_reg dst, src_reg src0, src_reg src1) 215{ 216 vec4_instruction *inst = emit(opcode, dst, src0, src1); 217 inst->base_mrf = 1; 218 inst->mlen = 2; 219} 220 221void 222vec4_visitor::emit_math(enum opcode opcode, 223 dst_reg dst, src_reg src0, src_reg src1) 224{ 225 assert(opcode == SHADER_OPCODE_POW); 226 227 if (intel->gen >= 6) { 228 return emit_math2_gen6(opcode, dst, src0, src1); 229 } else { 230 return emit_math2_gen4(opcode, dst, src0, src1); 231 } 232} 233 234void 235vec4_visitor::visit_instructions(const exec_list *list) 236{ 237 foreach_iter(exec_list_iterator, iter, *list) { 238 ir_instruction *ir = (ir_instruction *)iter.get(); 239 240 base_ir = ir; 241 ir->accept(this); 242 } 243} 244 245 246static int 247type_size(const struct glsl_type *type) 248{ 249 unsigned int i; 250 int size; 251 252 switch (type->base_type) { 253 case GLSL_TYPE_UINT: 254 case GLSL_TYPE_INT: 255 case GLSL_TYPE_FLOAT: 256 case GLSL_TYPE_BOOL: 257 if (type->is_matrix()) { 258 return type->matrix_columns; 259 } else { 260 /* Regardless of size of vector, it gets a vec4. This is bad 261 * packing for things like floats, but otherwise arrays become a 262 * mess. Hopefully a later pass over the code can pack scalars 263 * down if appropriate. 264 */ 265 return 1; 266 } 267 case GLSL_TYPE_ARRAY: 268 assert(type->length > 0); 269 return type_size(type->fields.array) * type->length; 270 case GLSL_TYPE_STRUCT: 271 size = 0; 272 for (i = 0; i < type->length; i++) { 273 size += type_size(type->fields.structure[i].type); 274 } 275 return size; 276 case GLSL_TYPE_SAMPLER: 277 /* Samplers take up one slot in UNIFORMS[], but they're baked in 278 * at link time. 279 */ 280 return 1; 281 default: 282 assert(0); 283 return 0; 284 } 285} 286 287int 288vec4_visitor::virtual_grf_alloc(int size) 289{ 290 if (virtual_grf_array_size <= virtual_grf_count) { 291 if (virtual_grf_array_size == 0) 292 virtual_grf_array_size = 16; 293 else 294 virtual_grf_array_size *= 2; 295 virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, 296 virtual_grf_array_size); 297 } 298 virtual_grf_sizes[virtual_grf_count] = size; 299 return virtual_grf_count++; 300} 301 302src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) 303{ 304 init(); 305 306 this->file = GRF; 307 this->reg = v->virtual_grf_alloc(type_size(type)); 308 309 if (type->is_array() || type->is_record()) { 310 this->swizzle = BRW_SWIZZLE_NOOP; 311 } else { 312 this->swizzle = swizzle_for_size(type->vector_elements); 313 } 314 315 this->type = brw_type_for_base_type(type); 316} 317 318dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) 319{ 320 init(); 321 322 this->file = GRF; 323 this->reg = v->virtual_grf_alloc(type_size(type)); 324 325 if (type->is_array() || type->is_record()) { 326 this->writemask = WRITEMASK_XYZW; 327 } else { 328 this->writemask = (1 << type->vector_elements) - 1; 329 } 330 331 this->type = brw_type_for_base_type(type); 332} 333 334/* Our support for uniforms is piggy-backed on the struct 335 * gl_fragment_program, because that's where the values actually 336 * get stored, rather than in some global gl_shader_program uniform 337 * store. 338 */ 339int 340vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) 341{ 342 unsigned int offset = 0; 343 float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; 344 345 if (type->is_matrix()) { 346 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, 347 type->vector_elements, 348 1); 349 350 for (unsigned int i = 0; i < type->matrix_columns; i++) { 351 offset += setup_uniform_values(loc + offset, column); 352 } 353 354 return offset; 355 } 356 357 switch (type->base_type) { 358 case GLSL_TYPE_FLOAT: 359 case GLSL_TYPE_UINT: 360 case GLSL_TYPE_INT: 361 case GLSL_TYPE_BOOL: 362 for (unsigned int i = 0; i < type->vector_elements; i++) { 363 int slot = this->uniforms * 4 + i; 364 switch (type->base_type) { 365 case GLSL_TYPE_FLOAT: 366 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 367 break; 368 case GLSL_TYPE_UINT: 369 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; 370 break; 371 case GLSL_TYPE_INT: 372 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; 373 break; 374 case GLSL_TYPE_BOOL: 375 c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; 376 break; 377 default: 378 assert(!"not reached"); 379 c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; 380 break; 381 } 382 c->prog_data.param[slot] = &values[i]; 383 } 384 385 for (unsigned int i = type->vector_elements; i < 4; i++) { 386 c->prog_data.param_convert[this->uniforms * 4 + i] = 387 PARAM_CONVERT_ZERO; 388 c->prog_data.param[this->uniforms * 4 + i] = NULL; 389 } 390 391 this->uniform_size[this->uniforms] = type->vector_elements; 392 this->uniforms++; 393 394 return 1; 395 396 case GLSL_TYPE_STRUCT: 397 for (unsigned int i = 0; i < type->length; i++) { 398 offset += setup_uniform_values(loc + offset, 399 type->fields.structure[i].type); 400 } 401 return offset; 402 403 case GLSL_TYPE_ARRAY: 404 for (unsigned int i = 0; i < type->length; i++) { 405 offset += setup_uniform_values(loc + offset, type->fields.array); 406 } 407 return offset; 408 409 case GLSL_TYPE_SAMPLER: 410 /* The sampler takes up a slot, but we don't use any values from it. */ 411 return 1; 412 413 default: 414 assert(!"not reached"); 415 return 0; 416 } 417} 418 419/* Our support for builtin uniforms is even scarier than non-builtin. 420 * It sits on top of the PROG_STATE_VAR parameters that are 421 * automatically updated from GL context state. 422 */ 423void 424vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) 425{ 426 const ir_state_slot *const slots = ir->state_slots; 427 assert(ir->state_slots != NULL); 428 429 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 430 /* This state reference has already been setup by ir_to_mesa, 431 * but we'll get the same index back here. We can reference 432 * ParameterValues directly, since unlike brw_fs.cpp, we never 433 * add new state references during compile. 434 */ 435 int index = _mesa_add_state_reference(this->vp->Base.Parameters, 436 (gl_state_index *)slots[i].tokens); 437 float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; 438 439 this->uniform_size[this->uniforms] = 0; 440 /* Add each of the unique swizzled channels of the element. 441 * This will end up matching the size of the glsl_type of this field. 442 */ 443 int last_swiz = -1; 444 for (unsigned int j = 0; j < 4; j++) { 445 int swiz = GET_SWZ(slots[i].swizzle, j); 446 last_swiz = swiz; 447 448 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; 449 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; 450 if (swiz <= last_swiz) 451 this->uniform_size[this->uniforms]++; 452 } 453 this->uniforms++; 454 } 455} 456 457dst_reg * 458vec4_visitor::variable_storage(ir_variable *var) 459{ 460 return (dst_reg *)hash_table_find(this->variable_ht, var); 461} 462 463void 464vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) 465{ 466 ir_expression *expr = ir->as_expression(); 467 468 if (expr) { 469 src_reg op[2]; 470 vec4_instruction *inst; 471 472 assert(expr->get_num_operands() <= 2); 473 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 474 assert(expr->operands[i]->type->is_scalar()); 475 476 expr->operands[i]->accept(this); 477 op[i] = this->result; 478 } 479 480 switch (expr->operation) { 481 case ir_unop_logic_not: 482 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); 483 inst->conditional_mod = BRW_CONDITIONAL_Z; 484 break; 485 486 case ir_binop_logic_xor: 487 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); 488 inst->conditional_mod = BRW_CONDITIONAL_NZ; 489 break; 490 491 case ir_binop_logic_or: 492 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); 493 inst->conditional_mod = BRW_CONDITIONAL_NZ; 494 break; 495 496 case ir_binop_logic_and: 497 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); 498 inst->conditional_mod = BRW_CONDITIONAL_NZ; 499 break; 500 501 case ir_unop_f2b: 502 if (intel->gen >= 6) { 503 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); 504 } else { 505 inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); 506 } 507 inst->conditional_mod = BRW_CONDITIONAL_NZ; 508 break; 509 510 case ir_unop_i2b: 511 if (intel->gen >= 6) { 512 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 513 } else { 514 inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); 515 } 516 inst->conditional_mod = BRW_CONDITIONAL_NZ; 517 break; 518 519 case ir_binop_greater: 520 case ir_binop_gequal: 521 case ir_binop_less: 522 case ir_binop_lequal: 523 case ir_binop_equal: 524 case ir_binop_all_equal: 525 case ir_binop_nequal: 526 case ir_binop_any_nequal: 527 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 528 inst->conditional_mod = 529 brw_conditional_for_comparison(expr->operation); 530 break; 531 532 default: 533 assert(!"not reached"); 534 break; 535 } 536 return; 537 } 538 539 ir->accept(this); 540 541 if (intel->gen >= 6) { 542 vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), 543 this->result, src_reg(1)); 544 inst->conditional_mod = BRW_CONDITIONAL_NZ; 545 } else { 546 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); 547 inst->conditional_mod = BRW_CONDITIONAL_NZ; 548 } 549} 550 551/** 552 * Emit a gen6 IF statement with the comparison folded into the IF 553 * instruction. 554 */ 555void 556vec4_visitor::emit_if_gen6(ir_if *ir) 557{ 558 ir_expression *expr = ir->condition->as_expression(); 559 560 if (expr) { 561 src_reg op[2]; 562 vec4_instruction *inst; 563 dst_reg temp; 564 565 assert(expr->get_num_operands() <= 2); 566 for (unsigned int i = 0; i < expr->get_num_operands(); i++) { 567 assert(expr->operands[i]->type->is_scalar() || 568 expr->operation == ir_binop_any_nequal || 569 expr->operation == ir_binop_all_equal); 570 571 expr->operands[i]->accept(this); 572 op[i] = this->result; 573 } 574 575 switch (expr->operation) { 576 case ir_unop_logic_not: 577 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 578 inst->conditional_mod = BRW_CONDITIONAL_Z; 579 return; 580 581 case ir_binop_logic_xor: 582 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 583 inst->conditional_mod = BRW_CONDITIONAL_NZ; 584 return; 585 586 case ir_binop_logic_or: 587 temp = dst_reg(this, glsl_type::bool_type); 588 emit(BRW_OPCODE_OR, temp, op[0], op[1]); 589 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 590 inst->conditional_mod = BRW_CONDITIONAL_NZ; 591 return; 592 593 case ir_binop_logic_and: 594 temp = dst_reg(this, glsl_type::bool_type); 595 emit(BRW_OPCODE_AND, temp, op[0], op[1]); 596 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); 597 inst->conditional_mod = BRW_CONDITIONAL_NZ; 598 return; 599 600 case ir_unop_f2b: 601 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); 602 inst->conditional_mod = BRW_CONDITIONAL_NZ; 603 return; 604 605 case ir_unop_i2b: 606 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 607 inst->conditional_mod = BRW_CONDITIONAL_NZ; 608 return; 609 610 case ir_binop_greater: 611 case ir_binop_gequal: 612 case ir_binop_less: 613 case ir_binop_lequal: 614 case ir_binop_equal: 615 case ir_binop_nequal: 616 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); 617 inst->conditional_mod = 618 brw_conditional_for_comparison(expr->operation); 619 return; 620 621 case ir_binop_all_equal: 622 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 623 inst->conditional_mod = BRW_CONDITIONAL_Z; 624 625 inst = emit(BRW_OPCODE_IF); 626 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 627 return; 628 629 case ir_binop_any_nequal: 630 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); 631 inst->conditional_mod = BRW_CONDITIONAL_NZ; 632 633 inst = emit(BRW_OPCODE_IF); 634 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 635 return; 636 637 default: 638 assert(!"not reached"); 639 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); 640 inst->conditional_mod = BRW_CONDITIONAL_NZ; 641 return; 642 } 643 return; 644 } 645 646 ir->condition->accept(this); 647 648 vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), 649 this->result, src_reg(0)); 650 inst->conditional_mod = BRW_CONDITIONAL_NZ; 651} 652 653void 654vec4_visitor::visit(ir_variable *ir) 655{ 656 dst_reg *reg = NULL; 657 658 if (variable_storage(ir)) 659 return; 660 661 switch (ir->mode) { 662 case ir_var_in: 663 reg = new(mem_ctx) dst_reg(ATTR, ir->location); 664 break; 665 666 case ir_var_out: 667 reg = new(mem_ctx) dst_reg(this, ir->type); 668 669 for (int i = 0; i < type_size(ir->type); i++) { 670 output_reg[ir->location + i] = *reg; 671 output_reg[ir->location + i].reg_offset = i; 672 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; 673 } 674 break; 675 676 case ir_var_auto: 677 case ir_var_temporary: 678 reg = new(mem_ctx) dst_reg(this, ir->type); 679 break; 680 681 case ir_var_uniform: 682 reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); 683 684 if (!strncmp(ir->name, "gl_", 3)) { 685 setup_builtin_uniform_values(ir); 686 } else { 687 setup_uniform_values(ir->location, ir->type); 688 } 689 break; 690 691 default: 692 assert(!"not reached"); 693 } 694 695 reg->type = brw_type_for_base_type(ir->type); 696 hash_table_insert(this->variable_ht, reg, ir); 697} 698 699void 700vec4_visitor::visit(ir_loop *ir) 701{ 702 dst_reg counter; 703 704 /* We don't want debugging output to print the whole body of the 705 * loop as the annotation. 706 */ 707 this->base_ir = NULL; 708 709 if (ir->counter != NULL) { 710 this->base_ir = ir->counter; 711 ir->counter->accept(this); 712 counter = *(variable_storage(ir->counter)); 713 714 if (ir->from != NULL) { 715 this->base_ir = ir->from; 716 ir->from->accept(this); 717 718 emit(BRW_OPCODE_MOV, counter, this->result); 719 } 720 } 721 722 emit(BRW_OPCODE_DO); 723 724 if (ir->to) { 725 this->base_ir = ir->to; 726 ir->to->accept(this); 727 728 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), 729 src_reg(counter), this->result); 730 inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); 731 732 inst = emit(BRW_OPCODE_BREAK); 733 inst->predicate = BRW_PREDICATE_NORMAL; 734 } 735 736 visit_instructions(&ir->body_instructions); 737 738 739 if (ir->increment) { 740 this->base_ir = ir->increment; 741 ir->increment->accept(this); 742 emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); 743 } 744 745 emit(BRW_OPCODE_WHILE); 746} 747 748void 749vec4_visitor::visit(ir_loop_jump *ir) 750{ 751 switch (ir->mode) { 752 case ir_loop_jump::jump_break: 753 emit(BRW_OPCODE_BREAK); 754 break; 755 case ir_loop_jump::jump_continue: 756 emit(BRW_OPCODE_CONTINUE); 757 break; 758 } 759} 760 761 762void 763vec4_visitor::visit(ir_function_signature *ir) 764{ 765 assert(0); 766 (void)ir; 767} 768 769void 770vec4_visitor::visit(ir_function *ir) 771{ 772 /* Ignore function bodies other than main() -- we shouldn't see calls to 773 * them since they should all be inlined. 774 */ 775 if (strcmp(ir->name, "main") == 0) { 776 const ir_function_signature *sig; 777 exec_list empty; 778 779 sig = ir->matching_signature(&empty); 780 781 assert(sig); 782 783 visit_instructions(&sig->body); 784 } 785} 786 787GLboolean 788vec4_visitor::try_emit_sat(ir_expression *ir) 789{ 790 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 791 if (!sat_src) 792 return false; 793 794 sat_src->accept(this); 795 src_reg src = this->result; 796 797 this->result = src_reg(this, ir->type); 798 vec4_instruction *inst; 799 inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); 800 inst->saturate = true; 801 802 return true; 803} 804 805void 806vec4_visitor::emit_bool_comparison(unsigned int op, 807 dst_reg dst, src_reg src0, src_reg src1) 808{ 809 /* original gen4 does destination conversion before comparison. */ 810 if (intel->gen < 5) 811 dst.type = src0.type; 812 813 vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); 814 inst->conditional_mod = brw_conditional_for_comparison(op); 815 816 dst.type = BRW_REGISTER_TYPE_D; 817 emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); 818} 819 820void 821vec4_visitor::visit(ir_expression *ir) 822{ 823 unsigned int operand; 824 src_reg op[Elements(ir->operands)]; 825 src_reg result_src; 826 dst_reg result_dst; 827 vec4_instruction *inst; 828 829 if (try_emit_sat(ir)) 830 return; 831 832 for (operand = 0; operand < ir->get_num_operands(); operand++) { 833 this->result.file = BAD_FILE; 834 ir->operands[operand]->accept(this); 835 if (this->result.file == BAD_FILE) { 836 printf("Failed to get tree for expression operand:\n"); 837 ir->operands[operand]->print(); 838 exit(1); 839 } 840 op[operand] = this->result; 841 842 /* Matrix expression operands should have been broken down to vector 843 * operations already. 844 */ 845 assert(!ir->operands[operand]->type->is_matrix()); 846 } 847 848 int vector_elements = ir->operands[0]->type->vector_elements; 849 if (ir->operands[1]) { 850 vector_elements = MAX2(vector_elements, 851 ir->operands[1]->type->vector_elements); 852 } 853 854 this->result.file = BAD_FILE; 855 856 /* Storage for our result. Ideally for an assignment we'd be using 857 * the actual storage for the result here, instead. 858 */ 859 result_src = src_reg(this, ir->type); 860 /* convenience for the emit functions below. */ 861 result_dst = dst_reg(result_src); 862 /* If nothing special happens, this is the result. */ 863 this->result = result_src; 864 /* Limit writes to the channels that will be used by result_src later. 865 * This does limit this temp's use as a temporary for multi-instruction 866 * sequences. 867 */ 868 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 869 870 switch (ir->operation) { 871 case ir_unop_logic_not: 872 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is 873 * ones complement of the whole register, not just bit 0. 874 */ 875 emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); 876 break; 877 case ir_unop_neg: 878 op[0].negate = !op[0].negate; 879 this->result = op[0]; 880 break; 881 case ir_unop_abs: 882 op[0].abs = true; 883 op[0].negate = false; 884 this->result = op[0]; 885 break; 886 887 case ir_unop_sign: 888 emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); 889 890 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 891 inst->conditional_mod = BRW_CONDITIONAL_G; 892 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); 893 inst->predicate = BRW_PREDICATE_NORMAL; 894 895 inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); 896 inst->conditional_mod = BRW_CONDITIONAL_L; 897 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); 898 inst->predicate = BRW_PREDICATE_NORMAL; 899 900 break; 901 902 case ir_unop_rcp: 903 emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); 904 break; 905 906 case ir_unop_exp2: 907 emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); 908 break; 909 case ir_unop_log2: 910 emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); 911 break; 912 case ir_unop_exp: 913 case ir_unop_log: 914 assert(!"not reached: should be handled by ir_explog_to_explog2"); 915 break; 916 case ir_unop_sin: 917 case ir_unop_sin_reduced: 918 emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); 919 break; 920 case ir_unop_cos: 921 case ir_unop_cos_reduced: 922 emit_math(SHADER_OPCODE_COS, result_dst, op[0]); 923 break; 924 925 case ir_unop_dFdx: 926 case ir_unop_dFdy: 927 assert(!"derivatives not valid in vertex shader"); 928 break; 929 930 case ir_unop_noise: 931 assert(!"not reached: should be handled by lower_noise"); 932 break; 933 934 case ir_binop_add: 935 emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); 936 break; 937 case ir_binop_sub: 938 assert(!"not reached: should be handled by ir_sub_to_add_neg"); 939 break; 940 941 case ir_binop_mul: 942 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); 943 break; 944 case ir_binop_div: 945 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 946 case ir_binop_mod: 947 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 948 break; 949 950 case ir_binop_less: 951 case ir_binop_greater: 952 case ir_binop_lequal: 953 case ir_binop_gequal: 954 case ir_binop_equal: 955 case ir_binop_nequal: { 956 dst_reg temp = result_dst; 957 /* original gen4 does implicit conversion before comparison. */ 958 if (intel->gen < 5) 959 temp.type = op[0].type; 960 961 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 962 inst->conditional_mod = brw_conditional_for_comparison(ir->operation); 963 emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); 964 break; 965 } 966 967 case ir_binop_all_equal: 968 /* "==" operator producing a scalar boolean. */ 969 if (ir->operands[0]->type->is_vector() || 970 ir->operands[1]->type->is_vector()) { 971 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 972 inst->conditional_mod = BRW_CONDITIONAL_Z; 973 974 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 975 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 976 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 977 } else { 978 dst_reg temp = result_dst; 979 /* original gen4 does implicit conversion before comparison. */ 980 if (intel->gen < 5) 981 temp.type = op[0].type; 982 983 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 984 inst->conditional_mod = BRW_CONDITIONAL_NZ; 985 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 986 } 987 break; 988 case ir_binop_any_nequal: 989 /* "!=" operator producing a scalar boolean. */ 990 if (ir->operands[0]->type->is_vector() || 991 ir->operands[1]->type->is_vector()) { 992 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); 993 inst->conditional_mod = BRW_CONDITIONAL_NZ; 994 995 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 996 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 997 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 998 } else { 999 dst_reg temp = result_dst; 1000 /* original gen4 does implicit conversion before comparison. */ 1001 if (intel->gen < 5) 1002 temp.type = op[0].type; 1003 1004 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); 1005 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1006 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); 1007 } 1008 break; 1009 1010 case ir_unop_any: 1011 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); 1012 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1013 1014 emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); 1015 1016 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); 1017 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1018 break; 1019 1020 case ir_binop_logic_xor: 1021 emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1022 break; 1023 1024 case ir_binop_logic_or: 1025 emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1026 break; 1027 1028 case ir_binop_logic_and: 1029 emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1030 break; 1031 1032 case ir_binop_dot: 1033 assert(ir->operands[0]->type->is_vector()); 1034 assert(ir->operands[0]->type == ir->operands[1]->type); 1035 emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); 1036 break; 1037 1038 case ir_unop_sqrt: 1039 emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); 1040 break; 1041 case ir_unop_rsq: 1042 emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); 1043 break; 1044 case ir_unop_i2f: 1045 case ir_unop_i2u: 1046 case ir_unop_u2i: 1047 case ir_unop_u2f: 1048 case ir_unop_b2f: 1049 case ir_unop_b2i: 1050 case ir_unop_f2i: 1051 emit(BRW_OPCODE_MOV, result_dst, op[0]); 1052 break; 1053 case ir_unop_f2b: 1054 case ir_unop_i2b: { 1055 dst_reg temp = result_dst; 1056 /* original gen4 does implicit conversion before comparison. */ 1057 if (intel->gen < 5) 1058 temp.type = op[0].type; 1059 1060 inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); 1061 inst->conditional_mod = BRW_CONDITIONAL_NZ; 1062 inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); 1063 break; 1064 } 1065 1066 case ir_unop_trunc: 1067 emit(BRW_OPCODE_RNDZ, result_dst, op[0]); 1068 break; 1069 case ir_unop_ceil: 1070 op[0].negate = !op[0].negate; 1071 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1072 this->result.negate = true; 1073 break; 1074 case ir_unop_floor: 1075 inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); 1076 break; 1077 case ir_unop_fract: 1078 inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); 1079 break; 1080 case ir_unop_round_even: 1081 emit(BRW_OPCODE_RNDE, result_dst, op[0]); 1082 break; 1083 1084 case ir_binop_min: 1085 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1086 inst->conditional_mod = BRW_CONDITIONAL_L; 1087 1088 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1089 inst->predicate = BRW_PREDICATE_NORMAL; 1090 break; 1091 case ir_binop_max: 1092 inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); 1093 inst->conditional_mod = BRW_CONDITIONAL_G; 1094 1095 inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); 1096 inst->predicate = BRW_PREDICATE_NORMAL; 1097 break; 1098 1099 case ir_binop_pow: 1100 emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); 1101 break; 1102 1103 case ir_unop_bit_not: 1104 inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); 1105 break; 1106 case ir_binop_bit_and: 1107 inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); 1108 break; 1109 case ir_binop_bit_xor: 1110 inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); 1111 break; 1112 case ir_binop_bit_or: 1113 inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); 1114 break; 1115 1116 case ir_binop_lshift: 1117 case ir_binop_rshift: 1118 assert(!"GLSL 1.30 features unsupported"); 1119 break; 1120 1121 case ir_quadop_vector: 1122 assert(!"not reached: should be handled by lower_quadop_vector"); 1123 break; 1124 } 1125} 1126 1127 1128void 1129vec4_visitor::visit(ir_swizzle *ir) 1130{ 1131 src_reg src; 1132 int i = 0; 1133 int swizzle[4]; 1134 1135 /* Note that this is only swizzles in expressions, not those on the left 1136 * hand side of an assignment, which do write masking. See ir_assignment 1137 * for that. 1138 */ 1139 1140 ir->val->accept(this); 1141 src = this->result; 1142 assert(src.file != BAD_FILE); 1143 1144 for (i = 0; i < ir->type->vector_elements; i++) { 1145 switch (i) { 1146 case 0: 1147 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); 1148 break; 1149 case 1: 1150 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); 1151 break; 1152 case 2: 1153 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); 1154 break; 1155 case 3: 1156 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); 1157 break; 1158 } 1159 } 1160 for (; i < 4; i++) { 1161 /* Replicate the last channel out. */ 1162 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1163 } 1164 1165 src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1166 1167 this->result = src; 1168} 1169 1170void 1171vec4_visitor::visit(ir_dereference_variable *ir) 1172{ 1173 const struct glsl_type *type = ir->type; 1174 dst_reg *reg = variable_storage(ir->var); 1175 1176 if (!reg) { 1177 fail("Failed to find variable storage for %s\n", ir->var->name); 1178 this->result = src_reg(brw_null_reg()); 1179 return; 1180 } 1181 1182 this->result = src_reg(*reg); 1183 1184 if (type->is_scalar() || type->is_vector() || type->is_matrix()) 1185 this->result.swizzle = swizzle_for_size(type->vector_elements); 1186} 1187 1188void 1189vec4_visitor::visit(ir_dereference_array *ir) 1190{ 1191 ir_constant *constant_index; 1192 src_reg src; 1193 int element_size = type_size(ir->type); 1194 1195 constant_index = ir->array_index->constant_expression_value(); 1196 1197 ir->array->accept(this); 1198 src = this->result; 1199 1200 if (constant_index) { 1201 src.reg_offset += constant_index->value.i[0] * element_size; 1202 } else { 1203 /* Variable index array dereference. It eats the "vec4" of the 1204 * base of the array and an index that offsets the Mesa register 1205 * index. 1206 */ 1207 ir->array_index->accept(this); 1208 1209 src_reg index_reg; 1210 1211 if (element_size == 1) { 1212 index_reg = this->result; 1213 } else { 1214 index_reg = src_reg(this, glsl_type::int_type); 1215 1216 emit(BRW_OPCODE_MUL, dst_reg(index_reg), 1217 this->result, src_reg(element_size)); 1218 } 1219 1220 if (src.reladdr) { 1221 src_reg temp = src_reg(this, glsl_type::int_type); 1222 1223 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); 1224 1225 index_reg = temp; 1226 } 1227 1228 src.reladdr = ralloc(mem_ctx, src_reg); 1229 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1230 } 1231 1232 /* If the type is smaller than a vec4, replicate the last channel out. */ 1233 if (ir->type->is_scalar() || ir->type->is_vector()) 1234 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1235 else 1236 src.swizzle = BRW_SWIZZLE_NOOP; 1237 src.type = brw_type_for_base_type(ir->type); 1238 1239 this->result = src; 1240} 1241 1242void 1243vec4_visitor::visit(ir_dereference_record *ir) 1244{ 1245 unsigned int i; 1246 const glsl_type *struct_type = ir->record->type; 1247 int offset = 0; 1248 1249 ir->record->accept(this); 1250 1251 for (i = 0; i < struct_type->length; i++) { 1252 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1253 break; 1254 offset += type_size(struct_type->fields.structure[i].type); 1255 } 1256 1257 /* If the type is smaller than a vec4, replicate the last channel out. */ 1258 if (ir->type->is_scalar() || ir->type->is_vector()) 1259 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1260 else 1261 this->result.swizzle = BRW_SWIZZLE_NOOP; 1262 this->result.type = brw_type_for_base_type(ir->type); 1263 1264 this->result.reg_offset += offset; 1265} 1266 1267/** 1268 * We want to be careful in assignment setup to hit the actual storage 1269 * instead of potentially using a temporary like we might with the 1270 * ir_dereference handler. 1271 */ 1272static dst_reg 1273get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) 1274{ 1275 /* The LHS must be a dereference. If the LHS is a variable indexed array 1276 * access of a vector, it must be separated into a series conditional moves 1277 * before reaching this point (see ir_vec_index_to_cond_assign). 1278 */ 1279 assert(ir->as_dereference()); 1280 ir_dereference_array *deref_array = ir->as_dereference_array(); 1281 if (deref_array) { 1282 assert(!deref_array->array->type->is_vector()); 1283 } 1284 1285 /* Use the rvalue deref handler for the most part. We'll ignore 1286 * swizzles in it and write swizzles using writemask, though. 1287 */ 1288 ir->accept(v); 1289 return dst_reg(v->result); 1290} 1291 1292void 1293vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, 1294 const struct glsl_type *type, bool predicated) 1295{ 1296 if (type->base_type == GLSL_TYPE_STRUCT) { 1297 for (unsigned int i = 0; i < type->length; i++) { 1298 emit_block_move(dst, src, type->fields.structure[i].type, predicated); 1299 } 1300 return; 1301 } 1302 1303 if (type->is_array()) { 1304 for (unsigned int i = 0; i < type->length; i++) { 1305 emit_block_move(dst, src, type->fields.array, predicated); 1306 } 1307 return; 1308 } 1309 1310 if (type->is_matrix()) { 1311 const struct glsl_type *vec_type; 1312 1313 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 1314 type->vector_elements, 1); 1315 1316 for (int i = 0; i < type->matrix_columns; i++) { 1317 emit_block_move(dst, src, vec_type, predicated); 1318 } 1319 return; 1320 } 1321 1322 assert(type->is_scalar() || type->is_vector()); 1323 1324 dst->type = brw_type_for_base_type(type); 1325 src->type = dst->type; 1326 1327 dst->writemask = (1 << type->vector_elements) - 1; 1328 1329 /* Do we need to worry about swizzling a swizzle? */ 1330 assert(src->swizzle = BRW_SWIZZLE_NOOP); 1331 src->swizzle = swizzle_for_size(type->vector_elements); 1332 1333 vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); 1334 if (predicated) 1335 inst->predicate = BRW_PREDICATE_NORMAL; 1336 1337 dst->reg_offset++; 1338 src->reg_offset++; 1339} 1340 1341void 1342vec4_visitor::visit(ir_assignment *ir) 1343{ 1344 dst_reg dst = get_assignment_lhs(ir->lhs, this); 1345 1346 if (!ir->lhs->type->is_scalar() && 1347 !ir->lhs->type->is_vector()) { 1348 ir->rhs->accept(this); 1349 src_reg src = this->result; 1350 1351 if (ir->condition) { 1352 emit_bool_to_cond_code(ir->condition); 1353 } 1354 1355 emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); 1356 return; 1357 } 1358 1359 /* Now we're down to just a scalar/vector with writemasks. */ 1360 int i; 1361 1362 ir->rhs->accept(this); 1363 src_reg src = this->result; 1364 1365 int swizzles[4]; 1366 int first_enabled_chan = 0; 1367 int src_chan = 0; 1368 1369 assert(ir->lhs->type->is_vector() || 1370 ir->lhs->type->is_scalar()); 1371 dst.writemask = ir->write_mask; 1372 1373 for (int i = 0; i < 4; i++) { 1374 if (dst.writemask & (1 << i)) { 1375 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); 1376 break; 1377 } 1378 } 1379 1380 /* Swizzle a small RHS vector into the channels being written. 1381 * 1382 * glsl ir treats write_mask as dictating how many channels are 1383 * present on the RHS while in our instructions we need to make 1384 * those channels appear in the slots of the vec4 they're written to. 1385 */ 1386 for (int i = 0; i < 4; i++) { 1387 if (dst.writemask & (1 << i)) 1388 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); 1389 else 1390 swizzles[i] = first_enabled_chan; 1391 } 1392 src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], 1393 swizzles[2], swizzles[3]); 1394 1395 if (ir->condition) { 1396 emit_bool_to_cond_code(ir->condition); 1397 } 1398 1399 for (i = 0; i < type_size(ir->lhs->type); i++) { 1400 vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); 1401 1402 if (ir->condition) 1403 inst->predicate = BRW_PREDICATE_NORMAL; 1404 1405 dst.reg_offset++; 1406 src.reg_offset++; 1407 } 1408} 1409 1410void 1411vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) 1412{ 1413 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1414 foreach_list(node, &ir->components) { 1415 ir_constant *field_value = (ir_constant *)node; 1416 1417 emit_constant_values(dst, field_value); 1418 } 1419 return; 1420 } 1421 1422 if (ir->type->is_array()) { 1423 for (unsigned int i = 0; i < ir->type->length; i++) { 1424 emit_constant_values(dst, ir->array_elements[i]); 1425 } 1426 return; 1427 } 1428 1429 if (ir->type->is_matrix()) { 1430 for (int i = 0; i < ir->type->matrix_columns; i++) { 1431 for (int j = 0; j < ir->type->vector_elements; j++) { 1432 dst->writemask = 1 << j; 1433 dst->type = BRW_REGISTER_TYPE_F; 1434 1435 emit(BRW_OPCODE_MOV, *dst, 1436 src_reg(ir->value.f[i * ir->type->vector_elements + j])); 1437 } 1438 dst->reg_offset++; 1439 } 1440 return; 1441 } 1442 1443 for (int i = 0; i < ir->type->vector_elements; i++) { 1444 dst->writemask = 1 << i; 1445 dst->type = brw_type_for_base_type(ir->type); 1446 1447 switch (ir->type->base_type) { 1448 case GLSL_TYPE_FLOAT: 1449 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); 1450 break; 1451 case GLSL_TYPE_INT: 1452 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); 1453 break; 1454 case GLSL_TYPE_UINT: 1455 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); 1456 break; 1457 case GLSL_TYPE_BOOL: 1458 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); 1459 break; 1460 default: 1461 assert(!"Non-float/uint/int/bool constant"); 1462 break; 1463 } 1464 } 1465 dst->reg_offset++; 1466} 1467 1468void 1469vec4_visitor::visit(ir_constant *ir) 1470{ 1471 dst_reg dst = dst_reg(this, ir->type); 1472 this->result = src_reg(dst); 1473 1474 emit_constant_values(&dst, ir); 1475} 1476 1477void 1478vec4_visitor::visit(ir_call *ir) 1479{ 1480 assert(!"not reached"); 1481} 1482 1483void 1484vec4_visitor::visit(ir_texture *ir) 1485{ 1486 assert(!"not reached"); 1487} 1488 1489void 1490vec4_visitor::visit(ir_return *ir) 1491{ 1492 assert(!"not reached"); 1493} 1494 1495void 1496vec4_visitor::visit(ir_discard *ir) 1497{ 1498 assert(!"not reached"); 1499} 1500 1501void 1502vec4_visitor::visit(ir_if *ir) 1503{ 1504 /* Don't point the annotation at the if statement, because then it plus 1505 * the then and else blocks get printed. 1506 */ 1507 this->base_ir = ir->condition; 1508 1509 if (intel->gen == 6) { 1510 emit_if_gen6(ir); 1511 } else { 1512 emit_bool_to_cond_code(ir->condition); 1513 vec4_instruction *inst = emit(BRW_OPCODE_IF); 1514 inst->predicate = BRW_PREDICATE_NORMAL; 1515 } 1516 1517 visit_instructions(&ir->then_instructions); 1518 1519 if (!ir->else_instructions.is_empty()) { 1520 this->base_ir = ir->condition; 1521 emit(BRW_OPCODE_ELSE); 1522 1523 visit_instructions(&ir->else_instructions); 1524 } 1525 1526 this->base_ir = ir->condition; 1527 emit(BRW_OPCODE_ENDIF); 1528} 1529 1530int 1531vec4_visitor::emit_vue_header_gen4(int header_mrf) 1532{ 1533 /* Get the position */ 1534 src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); 1535 1536 /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ 1537 dst_reg ndc = dst_reg(this, glsl_type::vec4_type); 1538 1539 current_annotation = "NDC"; 1540 dst_reg ndc_w = ndc; 1541 ndc_w.writemask = WRITEMASK_W; 1542 src_reg pos_w = pos; 1543 pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); 1544 emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); 1545 1546 dst_reg ndc_xyz = ndc; 1547 ndc_xyz.writemask = WRITEMASK_XYZ; 1548 1549 emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); 1550 1551 if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || 1552 c->key.nr_userclip || brw->has_negative_rhw_bug) { 1553 dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); 1554 GLuint i; 1555 1556 emit(BRW_OPCODE_MOV, header1, 0u); 1557 1558 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1559 assert(!"finishme: psiz"); 1560 src_reg psiz; 1561 1562 header1.writemask = WRITEMASK_W; 1563 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); 1564 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); 1565 } 1566 1567 for (i = 0; i < c->key.nr_userclip; i++) { 1568 vec4_instruction *inst; 1569 1570 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), 1571 pos, src_reg(c->userplane[i])); 1572 inst->conditional_mod = BRW_CONDITIONAL_L; 1573 1574 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); 1575 inst->predicate = BRW_PREDICATE_NORMAL; 1576 } 1577 1578 /* i965 clipping workaround: 1579 * 1) Test for -ve rhw 1580 * 2) If set, 1581 * set ndc = (0,0,0,0) 1582 * set ucp[6] = 1 1583 * 1584 * Later, clipping will detect ucp[6] and ensure the primitive is 1585 * clipped against all fixed planes. 1586 */ 1587 if (brw->has_negative_rhw_bug) { 1588#if 0 1589 /* FINISHME */ 1590 brw_CMP(p, 1591 vec8(brw_null_reg()), 1592 BRW_CONDITIONAL_L, 1593 brw_swizzle1(ndc, 3), 1594 brw_imm_f(0)); 1595 1596 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); 1597 brw_MOV(p, ndc, brw_imm_f(0)); 1598 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1599#endif 1600 } 1601 1602 header1.writemask = WRITEMASK_XYZW; 1603 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); 1604 } else { 1605 emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), 1606 BRW_REGISTER_TYPE_UD), 0u); 1607 } 1608 1609 if (intel->gen == 5) { 1610 /* There are 20 DWs (D0-D19) in VUE header on Ironlake: 1611 * dword 0-3 (m1) of the header is indices, point width, clip flags. 1612 * dword 4-7 (m2) is the ndc position (set above) 1613 * dword 8-11 (m3) of the vertex header is the 4D space position 1614 * dword 12-19 (m4,m5) of the vertex header is the user clip distance. 1615 * m6 is a pad so that the vertex element data is aligned 1616 * m7 is the first vertex data we fill. 1617 */ 1618 current_annotation = "NDC"; 1619 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1620 1621 current_annotation = "gl_Position"; 1622 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1623 1624 /* user clip distance. */ 1625 header_mrf += 2; 1626 1627 /* Pad so that vertex element data is aligned. */ 1628 header_mrf++; 1629 } else { 1630 /* There are 8 dwords in VUE header pre-Ironlake: 1631 * dword 0-3 (m1) is indices, point width, clip flags. 1632 * dword 4-7 (m2) is ndc position (set above) 1633 * 1634 * dword 8-11 (m3) is the first vertex data. 1635 */ 1636 current_annotation = "NDC"; 1637 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); 1638 1639 current_annotation = "gl_Position"; 1640 emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); 1641 } 1642 1643 return header_mrf; 1644} 1645 1646int 1647vec4_visitor::emit_vue_header_gen6(int header_mrf) 1648{ 1649 struct brw_reg reg; 1650 1651 /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: 1652 * dword 0-3 (m2) of the header is indices, point width, clip flags. 1653 * dword 4-7 (m3) is the 4D space position 1654 * dword 8-15 (m4,m5) of the vertex header is the user clip distance if 1655 * enabled. 1656 * 1657 * m4 or 6 is the first vertex element data we fill. 1658 */ 1659 1660 current_annotation = "indices, point width, clip flags"; 1661 reg = brw_message_reg(header_mrf++); 1662 emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); 1663 if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { 1664 emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), 1665 src_reg(output_reg[VERT_RESULT_PSIZ])); 1666 } 1667 1668 current_annotation = "gl_Position"; 1669 emit(BRW_OPCODE_MOV, 1670 brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); 1671 1672 current_annotation = "user clip distances"; 1673 if (c->key.nr_userclip) { 1674 for (int i = 0; i < c->key.nr_userclip; i++) { 1675 struct brw_reg m; 1676 if (i < 4) 1677 m = brw_message_reg(header_mrf); 1678 else 1679 m = brw_message_reg(header_mrf + 1); 1680 1681 emit(BRW_OPCODE_DP4, 1682 dst_reg(brw_writemask(m, 1 << (i & 3))), 1683 src_reg(c->userplane[i])); 1684 } 1685 header_mrf += 2; 1686 } 1687 1688 current_annotation = NULL; 1689 1690 return header_mrf; 1691} 1692 1693static int 1694align_interleaved_urb_mlen(struct brw_context *brw, int mlen) 1695{ 1696 struct intel_context *intel = &brw->intel; 1697 1698 if (intel->gen >= 6) { 1699 /* URB data written (does not include the message header reg) must 1700 * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, 1701 * section 5.4.3.2.2: URB_INTERLEAVED. 1702 * 1703 * URB entries are allocated on a multiple of 1024 bits, so an 1704 * extra 128 bits written here to make the end align to 256 is 1705 * no problem. 1706 */ 1707 if ((mlen % 2) != 1) 1708 mlen++; 1709 } 1710 1711 return mlen; 1712} 1713 1714/** 1715 * Generates the VUE payload plus the 1 or 2 URB write instructions to 1716 * complete the VS thread. 1717 * 1718 * The VUE layout is documented in Volume 2a. 1719 */ 1720void 1721vec4_visitor::emit_urb_writes() 1722{ 1723 /* MRF 0 is reserved for the debugger, so start with message header 1724 * in MRF 1. 1725 */ 1726 int base_mrf = 1; 1727 int mrf = base_mrf; 1728 int urb_entry_size; 1729 uint64_t outputs_remaining = c->prog_data.outputs_written; 1730 /* In the process of generating our URB write message contents, we 1731 * may need to unspill a register or load from an array. Those 1732 * reads would use MRFs 14-15. 1733 */ 1734 int max_usable_mrf = 13; 1735 1736 /* FINISHME: edgeflag */ 1737 1738 /* First mrf is the g0-based message header containing URB handles and such, 1739 * which is implied in VS_OPCODE_URB_WRITE. 1740 */ 1741 mrf++; 1742 1743 if (intel->gen >= 6) { 1744 mrf = emit_vue_header_gen6(mrf); 1745 } else { 1746 mrf = emit_vue_header_gen4(mrf); 1747 } 1748 1749 /* Set up the VUE data for the first URB write */ 1750 int attr; 1751 for (attr = 0; attr < VERT_RESULT_MAX; attr++) { 1752 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1753 continue; 1754 1755 outputs_remaining &= ~BITFIELD64_BIT(attr); 1756 1757 /* This is set up in the VUE header. */ 1758 if (attr == VERT_RESULT_HPOS) 1759 continue; 1760 1761 /* This is loaded into the VUE header, and thus doesn't occupy 1762 * an attribute slot. 1763 */ 1764 if (attr == VERT_RESULT_PSIZ) 1765 continue; 1766 1767 vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), 1768 src_reg(output_reg[attr])); 1769 1770 if ((attr == VERT_RESULT_COL0 || 1771 attr == VERT_RESULT_COL1 || 1772 attr == VERT_RESULT_BFC0 || 1773 attr == VERT_RESULT_BFC1) && 1774 c->key.clamp_vertex_color) { 1775 inst->saturate = true; 1776 } 1777 1778 /* If this was MRF 15, we can't fit anything more into this URB 1779 * WRITE. Note that base_mrf of 1 means that MRF 15 is an 1780 * even-numbered amount of URB write data, which will meet 1781 * gen6's requirements for length alignment. 1782 */ 1783 if (mrf > max_usable_mrf) { 1784 attr++; 1785 break; 1786 } 1787 } 1788 1789 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 1790 inst->base_mrf = base_mrf; 1791 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1792 inst->eot = !outputs_remaining; 1793 1794 urb_entry_size = mrf - base_mrf; 1795 1796 /* Optional second URB write */ 1797 if (outputs_remaining) { 1798 mrf = base_mrf + 1; 1799 1800 for (; attr < VERT_RESULT_MAX; attr++) { 1801 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) 1802 continue; 1803 1804 assert(mrf < max_usable_mrf); 1805 1806 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); 1807 } 1808 1809 inst = emit(VS_OPCODE_URB_WRITE); 1810 inst->base_mrf = base_mrf; 1811 inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); 1812 inst->eot = true; 1813 /* URB destination offset. In the previous write, we got MRFs 1814 * 2-13 minus the one header MRF, so 12 regs. URB offset is in 1815 * URB row increments, and each of our MRFs is half of one of 1816 * those, since we're doing interleaved writes. 1817 */ 1818 inst->offset = (max_usable_mrf - base_mrf) / 2; 1819 1820 urb_entry_size += mrf - base_mrf; 1821 } 1822 1823 if (intel->gen == 6) 1824 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; 1825 else 1826 c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; 1827} 1828 1829src_reg 1830vec4_visitor::get_scratch_offset(vec4_instruction *inst, 1831 src_reg *reladdr, int reg_offset) 1832{ 1833 /* Because we store the values to scratch interleaved like our 1834 * vertex data, we need to scale the vec4 index by 2. 1835 */ 1836 int message_header_scale = 2; 1837 1838 /* Pre-gen6, the message header uses byte offsets instead of vec4 1839 * (16-byte) offset units. 1840 */ 1841 if (intel->gen < 6) 1842 message_header_scale *= 16; 1843 1844 if (reladdr) { 1845 src_reg index = src_reg(this, glsl_type::int_type); 1846 1847 vec4_instruction *add = emit(BRW_OPCODE_ADD, 1848 dst_reg(index), 1849 *reladdr, 1850 src_reg(reg_offset)); 1851 /* Move our new instruction from the tail to its correct place. */ 1852 add->remove(); 1853 inst->insert_before(add); 1854 1855 vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), 1856 index, src_reg(message_header_scale)); 1857 mul->remove(); 1858 inst->insert_before(mul); 1859 1860 return index; 1861 } else { 1862 return src_reg(reg_offset * message_header_scale); 1863 } 1864} 1865 1866/** 1867 * Emits an instruction before @inst to load the value named by @orig_src 1868 * from scratch space at @base_offset to @temp. 1869 */ 1870void 1871vec4_visitor::emit_scratch_read(vec4_instruction *inst, 1872 dst_reg temp, src_reg orig_src, 1873 int base_offset) 1874{ 1875 int reg_offset = base_offset + orig_src.reg_offset; 1876 src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); 1877 1878 vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, 1879 temp, index); 1880 1881 scratch_read_inst->base_mrf = 14; 1882 scratch_read_inst->mlen = 1; 1883 /* Move our instruction from the tail to its correct place. */ 1884 scratch_read_inst->remove(); 1885 inst->insert_before(scratch_read_inst); 1886} 1887 1888/** 1889 * Emits an instruction after @inst to store the value to be written 1890 * to @orig_dst to scratch space at @base_offset, from @temp. 1891 */ 1892void 1893vec4_visitor::emit_scratch_write(vec4_instruction *inst, 1894 src_reg temp, dst_reg orig_dst, 1895 int base_offset) 1896{ 1897 int reg_offset = base_offset + orig_dst.reg_offset; 1898 src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); 1899 1900 dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), 1901 orig_dst.writemask)); 1902 vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, 1903 dst, temp, index); 1904 scratch_write_inst->base_mrf = 13; 1905 scratch_write_inst->mlen = 2; 1906 scratch_write_inst->predicate = inst->predicate; 1907 /* Move our instruction from the tail to its correct place. */ 1908 scratch_write_inst->remove(); 1909 inst->insert_after(scratch_write_inst); 1910} 1911 1912/** 1913 * We can't generally support array access in GRF space, because a 1914 * single instruction's destination can only span 2 contiguous 1915 * registers. So, we send all GRF arrays that get variable index 1916 * access to scratch space. 1917 */ 1918void 1919vec4_visitor::move_grf_array_access_to_scratch() 1920{ 1921 int scratch_loc[this->virtual_grf_count]; 1922 1923 for (int i = 0; i < this->virtual_grf_count; i++) { 1924 scratch_loc[i] = -1; 1925 } 1926 1927 /* First, calculate the set of virtual GRFs that need to be punted 1928 * to scratch due to having any array access on them, and where in 1929 * scratch. 1930 */ 1931 foreach_list(node, &this->instructions) { 1932 vec4_instruction *inst = (vec4_instruction *)node; 1933 1934 if (inst->dst.file == GRF && inst->dst.reladdr && 1935 scratch_loc[inst->dst.reg] == -1) { 1936 scratch_loc[inst->dst.reg] = c->last_scratch; 1937 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; 1938 } 1939 1940 for (int i = 0 ; i < 3; i++) { 1941 src_reg *src = &inst->src[i]; 1942 1943 if (src->file == GRF && src->reladdr && 1944 scratch_loc[src->reg] == -1) { 1945 scratch_loc[src->reg] = c->last_scratch; 1946 c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; 1947 } 1948 } 1949 } 1950 1951 /* Now, for anything that will be accessed through scratch, rewrite 1952 * it to load/store. Note that this is a _safe list walk, because 1953 * we may generate a new scratch_write instruction after the one 1954 * we're processing. 1955 */ 1956 foreach_list_safe(node, &this->instructions) { 1957 vec4_instruction *inst = (vec4_instruction *)node; 1958 1959 /* Set up the annotation tracking for new generated instructions. */ 1960 base_ir = inst->ir; 1961 current_annotation = inst->annotation; 1962 1963 if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { 1964 src_reg temp = src_reg(this, glsl_type::vec4_type); 1965 1966 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); 1967 1968 inst->dst.file = temp.file; 1969 inst->dst.reg = temp.reg; 1970 inst->dst.reg_offset = temp.reg_offset; 1971 inst->dst.reladdr = NULL; 1972 } 1973 1974 for (int i = 0 ; i < 3; i++) { 1975 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) 1976 continue; 1977 1978 dst_reg temp = dst_reg(this, glsl_type::vec4_type); 1979 1980 emit_scratch_read(inst, temp, inst->src[i], 1981 scratch_loc[inst->src[i].reg]); 1982 1983 inst->src[i].file = temp.file; 1984 inst->src[i].reg = temp.reg; 1985 inst->src[i].reg_offset = temp.reg_offset; 1986 inst->src[i].reladdr = NULL; 1987 } 1988 } 1989} 1990 1991 1992vec4_visitor::vec4_visitor(struct brw_vs_compile *c, 1993 struct gl_shader_program *prog, 1994 struct brw_shader *shader) 1995{ 1996 this->c = c; 1997 this->p = &c->func; 1998 this->brw = p->brw; 1999 this->intel = &brw->intel; 2000 this->ctx = &intel->ctx; 2001 this->prog = prog; 2002 this->shader = shader; 2003 2004 this->mem_ctx = ralloc_context(NULL); 2005 this->failed = false; 2006 2007 this->base_ir = NULL; 2008 this->current_annotation = NULL; 2009 2010 this->c = c; 2011 this->vp = brw->vertex_program; /* FINISHME: change for precompile */ 2012 this->prog_data = &c->prog_data; 2013 2014 this->variable_ht = hash_table_ctor(0, 2015 hash_table_pointer_hash, 2016 hash_table_pointer_compare); 2017 2018 this->virtual_grf_sizes = NULL; 2019 this->virtual_grf_count = 0; 2020 this->virtual_grf_array_size = 0; 2021 2022 this->uniforms = 0; 2023 2024 this->variable_ht = hash_table_ctor(0, 2025 hash_table_pointer_hash, 2026 hash_table_pointer_compare); 2027} 2028 2029vec4_visitor::~vec4_visitor() 2030{ 2031 hash_table_dtor(this->variable_ht); 2032} 2033 2034 2035void 2036vec4_visitor::fail(const char *format, ...) 2037{ 2038 va_list va; 2039 char *msg; 2040 2041 if (failed) 2042 return; 2043 2044 failed = true; 2045 2046 va_start(va, format); 2047 msg = ralloc_vasprintf(mem_ctx, format, va); 2048 va_end(va); 2049 msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); 2050 2051 this->fail_msg = msg; 2052 2053 if (INTEL_DEBUG & DEBUG_VS) { 2054 fprintf(stderr, "%s", msg); 2055 } 2056} 2057 2058} /* namespace brw */ 2059