brw_vec4_emit.cpp revision 584ff407482fd3baf5ce081dbbf9653eb76c40f1
1/* Copyright © 2011 Intel Corporation 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a 4 * copy of this software and associated documentation files (the "Software"), 5 * to deal in the Software without restriction, including without limitation 6 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 * and/or sell copies of the Software, and to permit persons to whom the 8 * Software is furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice (including the next 11 * paragraph) shall be included in all copies or substantial portions of the 12 * Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 * IN THE SOFTWARE. 21 */ 22 23#include "brw_vec4.h" 24#include "../glsl/ir_print_visitor.h" 25 26extern "C" { 27#include "brw_eu.h" 28}; 29 30using namespace brw; 31 32namespace brw { 33 34int 35vec4_visitor::setup_attributes(int payload_reg) 36{ 37 int nr_attributes; 38 int attribute_map[VERT_ATTRIB_MAX]; 39 40 nr_attributes = 0; 41 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 42 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 43 attribute_map[i] = payload_reg + nr_attributes; 44 nr_attributes++; 45 } 46 } 47 48 foreach_iter(exec_list_iterator, iter, this->instructions) { 49 vec4_instruction *inst = (vec4_instruction *)iter.get(); 50 51 for (int i = 0; i < 3; i++) { 52 if (inst->src[i].file != ATTR) 53 continue; 54 55 inst->src[i].file = HW_REG; 56 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 57 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 58 } 59 } 60 61 /* The BSpec says we always have to read at least one thing from 62 * the VF, and it appears that the hardware wedges otherwise. 63 */ 64 if (nr_attributes == 0) 65 nr_attributes = 1; 66 67 prog_data->urb_read_length = (nr_attributes + 1) / 2; 68 69 return payload_reg + nr_attributes; 70} 71 72int 73vec4_visitor::setup_uniforms(int reg) 74{ 75 /* User clip planes from curbe: 76 */ 77 if (c->key.nr_userclip) { 78 if (intel->gen >= 6) { 79 for (int i = 0; i < c->key.nr_userclip; i++) { 80 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 81 (i % 2) * 4), 0, 4, 1); 82 } 83 reg += ALIGN(c->key.nr_userclip, 2) / 2; 84 } else { 85 for (int i = 0; i < c->key.nr_userclip; i++) { 86 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 87 (i % 2) * 4), 0, 4, 1); 88 } 89 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 90 } 91 } 92 93 /* The pre-gen6 VS requires that some push constants get loaded no 94 * matter what, or the GPU would hang. 95 */ 96 if (intel->gen < 6 && this->uniforms == 0) { 97 this->uniform_size[this->uniforms] = 1; 98 99 for (unsigned int i = 0; i < 4; i++) { 100 unsigned int slot = this->uniforms * 4 + i; 101 102 c->prog_data.param[slot] = NULL; 103 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; 104 } 105 106 this->uniforms++; 107 reg++; 108 } else { 109 reg += ALIGN(uniforms, 2) / 2; 110 } 111 112 /* for now, we are not doing any elimination of unused slots, nor 113 * are we packing our uniforms. 114 */ 115 c->prog_data.nr_params = this->uniforms * 4; 116 117 c->prog_data.curb_read_length = reg - 1; 118 c->prog_data.uses_new_param_layout = true; 119 120 return reg; 121} 122 123void 124vec4_visitor::setup_payload(void) 125{ 126 int reg = 0; 127 128 /* The payload always contains important data in g0, which contains 129 * the URB handles that are passed on to the URB write at the end 130 * of the thread. So, we always start push constants at g1. 131 */ 132 reg++; 133 134 reg = setup_uniforms(reg); 135 136 reg = setup_attributes(reg); 137 138 this->first_non_payload_grf = reg; 139} 140 141struct brw_reg 142vec4_instruction::get_dst(void) 143{ 144 struct brw_reg brw_reg; 145 146 switch (dst.file) { 147 case GRF: 148 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 149 brw_reg = retype(brw_reg, dst.type); 150 brw_reg.dw1.bits.writemask = dst.writemask; 151 break; 152 153 case HW_REG: 154 brw_reg = dst.fixed_hw_reg; 155 break; 156 157 case BAD_FILE: 158 brw_reg = brw_null_reg(); 159 break; 160 161 default: 162 assert(!"not reached"); 163 brw_reg = brw_null_reg(); 164 break; 165 } 166 return brw_reg; 167} 168 169struct brw_reg 170vec4_instruction::get_src(int i) 171{ 172 struct brw_reg brw_reg; 173 174 switch (src[i].file) { 175 case GRF: 176 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 177 brw_reg = retype(brw_reg, src[i].type); 178 brw_reg.dw1.bits.swizzle = src[i].swizzle; 179 if (src[i].abs) 180 brw_reg = brw_abs(brw_reg); 181 if (src[i].negate) 182 brw_reg = negate(brw_reg); 183 break; 184 185 case IMM: 186 switch (src[i].type) { 187 case BRW_REGISTER_TYPE_F: 188 brw_reg = brw_imm_f(src[i].imm.f); 189 break; 190 case BRW_REGISTER_TYPE_D: 191 brw_reg = brw_imm_d(src[i].imm.i); 192 break; 193 case BRW_REGISTER_TYPE_UD: 194 brw_reg = brw_imm_ud(src[i].imm.u); 195 break; 196 default: 197 assert(!"not reached"); 198 brw_reg = brw_null_reg(); 199 break; 200 } 201 break; 202 203 case UNIFORM: 204 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, 205 ((src[i].reg + src[i].reg_offset) % 2) * 4), 206 0, 4, 1); 207 brw_reg = retype(brw_reg, src[i].type); 208 brw_reg.dw1.bits.swizzle = src[i].swizzle; 209 if (src[i].abs) 210 brw_reg = brw_abs(brw_reg); 211 if (src[i].negate) 212 brw_reg = negate(brw_reg); 213 break; 214 215 case HW_REG: 216 brw_reg = src[i].fixed_hw_reg; 217 break; 218 219 case BAD_FILE: 220 /* Probably unused. */ 221 brw_reg = brw_null_reg(); 222 break; 223 case ATTR: 224 default: 225 assert(!"not reached"); 226 brw_reg = brw_null_reg(); 227 break; 228 } 229 230 return brw_reg; 231} 232 233void 234vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 235 struct brw_reg dst, 236 struct brw_reg src) 237{ 238 brw_math(p, 239 dst, 240 brw_math_function(inst->opcode), 241 BRW_MATH_SATURATE_NONE, 242 inst->base_mrf, 243 src, 244 BRW_MATH_DATA_SCALAR, 245 BRW_MATH_PRECISION_FULL); 246} 247 248void 249vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 250 struct brw_reg dst, 251 struct brw_reg src) 252{ 253 brw_math(p, 254 dst, 255 brw_math_function(inst->opcode), 256 BRW_MATH_SATURATE_NONE, 257 inst->base_mrf, 258 src, 259 BRW_MATH_DATA_SCALAR, 260 BRW_MATH_PRECISION_FULL); 261} 262 263void 264vec4_visitor::generate_urb_write(vec4_instruction *inst) 265{ 266 brw_urb_WRITE(p, 267 brw_null_reg(), /* dest */ 268 inst->base_mrf, /* starting mrf reg nr */ 269 brw_vec8_grf(0, 0), /* src */ 270 false, /* allocate */ 271 true, /* used */ 272 inst->mlen, 273 0, /* response len */ 274 inst->eot, /* eot */ 275 inst->eot, /* writes complete */ 276 inst->offset, /* urb destination offset */ 277 BRW_URB_SWIZZLE_INTERLEAVE); 278} 279 280void 281vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, 282 struct brw_reg index) 283{ 284 int second_vertex_offset; 285 286 if (intel->gen >= 6) 287 second_vertex_offset = 1; 288 else 289 second_vertex_offset = 16; 290 291 m1 = retype(m1, BRW_REGISTER_TYPE_D); 292 293 /* Set up M1 (message payload). Only the block offsets in M1.0 and 294 * M1.4 are used, and the rest are ignored. 295 */ 296 struct brw_reg m1_0 = suboffset(vec1(m1), 0); 297 struct brw_reg m1_4 = suboffset(vec1(m1), 4); 298 struct brw_reg index_0 = suboffset(vec1(index), 0); 299 struct brw_reg index_4 = suboffset(vec1(index), 4); 300 301 brw_push_insn_state(p); 302 brw_set_mask_control(p, BRW_MASK_DISABLE); 303 brw_set_access_mode(p, BRW_ALIGN_1); 304 305 brw_MOV(p, m1_0, index_0); 306 307 brw_set_predicate_inverse(p, true); 308 if (index.file == BRW_IMMEDIATE_VALUE) { 309 index_4.dw1.ud++; 310 brw_MOV(p, m1_4, index_4); 311 } else { 312 brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); 313 } 314 315 brw_pop_insn_state(p); 316} 317 318void 319vec4_visitor::generate_scratch_read(vec4_instruction *inst, 320 struct brw_reg dst, 321 struct brw_reg index) 322{ 323 if (intel->gen >= 6) { 324 brw_push_insn_state(p); 325 brw_set_mask_control(p, BRW_MASK_DISABLE); 326 brw_MOV(p, 327 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), 328 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); 329 brw_pop_insn_state(p); 330 } 331 332 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), 333 index); 334 335 uint32_t msg_type; 336 337 if (intel->gen >= 6) 338 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 339 else if (intel->gen == 5 || intel->is_g4x) 340 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 341 else 342 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 343 344 /* Each of the 8 channel enables is considered for whether each 345 * dword is written. 346 */ 347 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 348 brw_set_dest(p, send, dst); 349 brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); 350 brw_set_dp_read_message(p, send, 351 255, /* binding table index: stateless access */ 352 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 353 msg_type, 354 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 355 2, /* mlen */ 356 1 /* rlen */); 357} 358 359void 360vec4_visitor::generate_scratch_write(vec4_instruction *inst, 361 struct brw_reg dst, 362 struct brw_reg src, 363 struct brw_reg index) 364{ 365 /* If the instruction is predicated, we'll predicate the send, not 366 * the header setup. 367 */ 368 brw_set_predicate_control(p, false); 369 370 if (intel->gen >= 6) { 371 brw_push_insn_state(p); 372 brw_set_mask_control(p, BRW_MASK_DISABLE); 373 brw_MOV(p, 374 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), 375 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); 376 brw_pop_insn_state(p); 377 } 378 379 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), 380 index); 381 382 brw_MOV(p, 383 retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), 384 retype(src, BRW_REGISTER_TYPE_D)); 385 386 uint32_t msg_type; 387 388 if (intel->gen >= 6) 389 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 390 else 391 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 392 393 brw_set_predicate_control(p, inst->predicate); 394 395 /* Each of the 8 channel enables is considered for whether each 396 * dword is written. 397 */ 398 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 399 brw_set_dest(p, send, dst); 400 brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); 401 brw_set_dp_write_message(p, send, 402 255, /* binding table index: stateless access */ 403 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 404 msg_type, 405 3, /* mlen */ 406 true, /* header present */ 407 false, /* pixel scoreboard */ 408 0, /* rlen */ 409 false, /* eot */ 410 false /* commit */); 411} 412 413void 414vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 415 struct brw_reg dst, 416 struct brw_reg *src) 417{ 418 vec4_instruction *inst = (vec4_instruction *)instruction; 419 420 switch (inst->opcode) { 421 case SHADER_OPCODE_RCP: 422 case SHADER_OPCODE_RSQ: 423 case SHADER_OPCODE_SQRT: 424 case SHADER_OPCODE_EXP2: 425 case SHADER_OPCODE_LOG2: 426 case SHADER_OPCODE_SIN: 427 case SHADER_OPCODE_COS: 428 if (intel->gen >= 6) { 429 generate_math1_gen6(inst, dst, src[0]); 430 } else { 431 generate_math1_gen4(inst, dst, src[0]); 432 } 433 break; 434 435 case SHADER_OPCODE_POW: 436 assert(!"finishme"); 437 break; 438 439 case VS_OPCODE_URB_WRITE: 440 generate_urb_write(inst); 441 break; 442 443 case VS_OPCODE_SCRATCH_READ: 444 generate_scratch_read(inst, dst, src[0]); 445 break; 446 447 case VS_OPCODE_SCRATCH_WRITE: 448 generate_scratch_write(inst, dst, src[0], src[1]); 449 break; 450 451 default: 452 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 453 fail("unsupported opcode in `%s' in VS\n", 454 brw_opcodes[inst->opcode].name); 455 } else { 456 fail("Unsupported opcode %d in VS", inst->opcode); 457 } 458 } 459} 460 461bool 462vec4_visitor::run() 463{ 464 /* Generate VS IR for main(). (the visitor only descends into 465 * functions called "main"). 466 */ 467 foreach_iter(exec_list_iterator, iter, *shader->ir) { 468 ir_instruction *ir = (ir_instruction *)iter.get(); 469 base_ir = ir; 470 ir->accept(this); 471 } 472 473 emit_urb_writes(); 474 475 /* Before any optimization, push array accesses out to scratch 476 * space where we need them to be. This pass may allocate new 477 * virtual GRFs, so we want to do it early. It also makes sure 478 * that we have reladdr computations available for CSE, since we'll 479 * often do repeated subexpressions for those. 480 */ 481 move_grf_array_access_to_scratch(); 482 483 if (failed) 484 return false; 485 486 setup_payload(); 487 reg_allocate(); 488 489 brw_set_access_mode(p, BRW_ALIGN_16); 490 491 generate_code(); 492 493 return !failed; 494} 495 496void 497vec4_visitor::generate_code() 498{ 499 int last_native_inst = p->nr_insn; 500 const char *last_annotation_string = NULL; 501 ir_instruction *last_annotation_ir = NULL; 502 503 int loop_stack_array_size = 16; 504 int loop_stack_depth = 0; 505 brw_instruction **loop_stack = 506 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 507 int *if_depth_in_loop = 508 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 509 510 511 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 512 printf("Native code for vertex shader %d:\n", prog->Name); 513 } 514 515 foreach_list(node, &this->instructions) { 516 vec4_instruction *inst = (vec4_instruction *)node; 517 struct brw_reg src[3], dst; 518 519 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 520 if (last_annotation_ir != inst->ir) { 521 last_annotation_ir = inst->ir; 522 if (last_annotation_ir) { 523 printf(" "); 524 last_annotation_ir->print(); 525 printf("\n"); 526 } 527 } 528 if (last_annotation_string != inst->annotation) { 529 last_annotation_string = inst->annotation; 530 if (last_annotation_string) 531 printf(" %s\n", last_annotation_string); 532 } 533 } 534 535 for (unsigned int i = 0; i < 3; i++) { 536 src[i] = inst->get_src(i); 537 } 538 dst = inst->get_dst(); 539 540 brw_set_conditionalmod(p, inst->conditional_mod); 541 brw_set_predicate_control(p, inst->predicate); 542 brw_set_predicate_inverse(p, inst->predicate_inverse); 543 brw_set_saturate(p, inst->saturate); 544 545 switch (inst->opcode) { 546 case BRW_OPCODE_MOV: 547 brw_MOV(p, dst, src[0]); 548 break; 549 case BRW_OPCODE_ADD: 550 brw_ADD(p, dst, src[0], src[1]); 551 break; 552 case BRW_OPCODE_MUL: 553 brw_MUL(p, dst, src[0], src[1]); 554 break; 555 556 case BRW_OPCODE_FRC: 557 brw_FRC(p, dst, src[0]); 558 break; 559 case BRW_OPCODE_RNDD: 560 brw_RNDD(p, dst, src[0]); 561 break; 562 case BRW_OPCODE_RNDE: 563 brw_RNDE(p, dst, src[0]); 564 break; 565 case BRW_OPCODE_RNDZ: 566 brw_RNDZ(p, dst, src[0]); 567 break; 568 569 case BRW_OPCODE_AND: 570 brw_AND(p, dst, src[0], src[1]); 571 break; 572 case BRW_OPCODE_OR: 573 brw_OR(p, dst, src[0], src[1]); 574 break; 575 case BRW_OPCODE_XOR: 576 brw_XOR(p, dst, src[0], src[1]); 577 break; 578 case BRW_OPCODE_NOT: 579 brw_NOT(p, dst, src[0]); 580 break; 581 case BRW_OPCODE_ASR: 582 brw_ASR(p, dst, src[0], src[1]); 583 break; 584 case BRW_OPCODE_SHR: 585 brw_SHR(p, dst, src[0], src[1]); 586 break; 587 case BRW_OPCODE_SHL: 588 brw_SHL(p, dst, src[0], src[1]); 589 break; 590 591 case BRW_OPCODE_CMP: 592 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 593 break; 594 case BRW_OPCODE_SEL: 595 brw_SEL(p, dst, src[0], src[1]); 596 break; 597 598 case BRW_OPCODE_DP4: 599 brw_DP4(p, dst, src[0], src[1]); 600 break; 601 602 case BRW_OPCODE_DP3: 603 brw_DP3(p, dst, src[0], src[1]); 604 break; 605 606 case BRW_OPCODE_DP2: 607 brw_DP2(p, dst, src[0], src[1]); 608 break; 609 610 case BRW_OPCODE_IF: 611 if (inst->src[0].file != BAD_FILE) { 612 /* The instruction has an embedded compare (only allowed on gen6) */ 613 assert(intel->gen == 6); 614 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 615 } else { 616 struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); 617 brw_inst->header.predicate_control = inst->predicate; 618 } 619 if_depth_in_loop[loop_stack_depth]++; 620 break; 621 622 case BRW_OPCODE_ELSE: 623 brw_ELSE(p); 624 break; 625 case BRW_OPCODE_ENDIF: 626 brw_ENDIF(p); 627 if_depth_in_loop[loop_stack_depth]--; 628 break; 629 630 case BRW_OPCODE_DO: 631 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 632 if (loop_stack_array_size <= loop_stack_depth) { 633 loop_stack_array_size *= 2; 634 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 635 loop_stack_array_size); 636 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 637 loop_stack_array_size); 638 } 639 if_depth_in_loop[loop_stack_depth] = 0; 640 break; 641 642 case BRW_OPCODE_BREAK: 643 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 644 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 645 break; 646 case BRW_OPCODE_CONTINUE: 647 /* FINISHME: We need to write the loop instruction support still. */ 648 if (intel->gen >= 6) 649 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 650 else 651 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 652 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 653 break; 654 655 case BRW_OPCODE_WHILE: { 656 struct brw_instruction *inst0, *inst1; 657 GLuint br = 1; 658 659 if (intel->gen >= 5) 660 br = 2; 661 662 assert(loop_stack_depth > 0); 663 loop_stack_depth--; 664 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 665 if (intel->gen < 6) { 666 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 667 while (inst0 > loop_stack[loop_stack_depth]) { 668 inst0--; 669 if (inst0->header.opcode == BRW_OPCODE_BREAK && 670 inst0->bits3.if_else.jump_count == 0) { 671 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 672 } 673 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 674 inst0->bits3.if_else.jump_count == 0) { 675 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 676 } 677 } 678 } 679 } 680 break; 681 682 default: 683 generate_vs_instruction(inst, dst, src); 684 break; 685 } 686 687 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 688 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 689 if (0) { 690 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 691 ((uint32_t *)&p->store[i])[3], 692 ((uint32_t *)&p->store[i])[2], 693 ((uint32_t *)&p->store[i])[1], 694 ((uint32_t *)&p->store[i])[0]); 695 } 696 brw_disasm(stdout, &p->store[i], intel->gen); 697 } 698 } 699 700 last_native_inst = p->nr_insn; 701 } 702 703 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 704 printf("\n"); 705 } 706 707 ralloc_free(loop_stack); 708 ralloc_free(if_depth_in_loop); 709 710 brw_set_uip_jip(p); 711 712 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 713 * emit issues, it doesn't get the jump distances into the output, 714 * which is often something we want to debug. So this is here in 715 * case you're doing that. 716 */ 717 if (0) { 718 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 719 for (unsigned int i = 0; i < p->nr_insn; i++) { 720 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 721 ((uint32_t *)&p->store[i])[3], 722 ((uint32_t *)&p->store[i])[2], 723 ((uint32_t *)&p->store[i])[1], 724 ((uint32_t *)&p->store[i])[0]); 725 brw_disasm(stdout, &p->store[i], intel->gen); 726 } 727 } 728 } 729} 730 731extern "C" { 732 733bool 734brw_vs_emit(struct brw_vs_compile *c) 735{ 736 struct brw_compile *p = &c->func; 737 struct brw_context *brw = p->brw; 738 struct intel_context *intel = &brw->intel; 739 struct gl_context *ctx = &intel->ctx; 740 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 741 742 if (!prog) 743 return false; 744 745 struct brw_shader *shader = 746 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 747 if (!shader) 748 return false; 749 750 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 751 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 752 _mesa_print_ir(shader->ir, NULL); 753 printf("\n\n"); 754 } 755 756 vec4_visitor v(c, prog, shader); 757 if (!v.run()) { 758 /* FINISHME: Cleanly fail, test at link time, etc. */ 759 assert(!"not reached"); 760 return false; 761 } 762 763 return true; 764} 765 766} /* extern "C" */ 767 768} /* namespace brw */ 769