brw_vec4_emit.cpp revision abf843a797876b5e3c5c91dbec25b6553d2cc281
1/* Copyright © 2011 Intel Corporation 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a 4 * copy of this software and associated documentation files (the "Software"), 5 * to deal in the Software without restriction, including without limitation 6 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 * and/or sell copies of the Software, and to permit persons to whom the 8 * Software is furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice (including the next 11 * paragraph) shall be included in all copies or substantial portions of the 12 * Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 * IN THE SOFTWARE. 21 */ 22 23#include "brw_vec4.h" 24#include "../glsl/ir_print_visitor.h" 25 26extern "C" { 27#include "brw_eu.h" 28}; 29 30using namespace brw; 31 32namespace brw { 33 34int 35vec4_visitor::setup_attributes(int payload_reg) 36{ 37 int nr_attributes; 38 int attribute_map[VERT_ATTRIB_MAX]; 39 40 nr_attributes = 0; 41 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 42 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 43 attribute_map[i] = payload_reg + nr_attributes; 44 nr_attributes++; 45 } 46 } 47 48 foreach_iter(exec_list_iterator, iter, this->instructions) { 49 vec4_instruction *inst = (vec4_instruction *)iter.get(); 50 51 for (int i = 0; i < 3; i++) { 52 if (inst->src[i].file != ATTR) 53 continue; 54 55 inst->src[i].file = HW_REG; 56 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 57 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 58 } 59 } 60 61 /* The BSpec says we always have to read at least one thing from 62 * the VF, and it appears that the hardware wedges otherwise. 63 */ 64 if (nr_attributes == 0) 65 nr_attributes = 1; 66 67 prog_data->urb_read_length = (nr_attributes + 1) / 2; 68 69 return payload_reg + nr_attributes; 70} 71 72int 73vec4_visitor::setup_uniforms(int reg) 74{ 75 /* User clip planes from curbe: 76 */ 77 if (c->key.nr_userclip) { 78 if (intel->gen >= 6) { 79 for (int i = 0; i < c->key.nr_userclip; i++) { 80 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 81 (i % 2) * 4), 0, 4, 1); 82 } 83 reg += ALIGN(c->key.nr_userclip, 2) / 2; 84 } else { 85 for (int i = 0; i < c->key.nr_userclip; i++) { 86 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 87 (i % 2) * 4), 0, 4, 1); 88 } 89 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 90 } 91 } 92 93 /* The pre-gen6 VS requires that some push constants get loaded no 94 * matter what, or the GPU would hang. 95 */ 96 if (intel->gen < 6 && this->uniforms == 0) { 97 this->uniform_size[this->uniforms] = 1; 98 99 for (unsigned int i = 0; i < 4; i++) { 100 unsigned int slot = this->uniforms * 4 + i; 101 102 c->prog_data.param[slot] = NULL; 103 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; 104 } 105 106 this->uniforms++; 107 reg++; 108 } else { 109 reg += ALIGN(uniforms, 2) / 2; 110 } 111 112 /* for now, we are not doing any elimination of unused slots, nor 113 * are we packing our uniforms. 114 */ 115 c->prog_data.nr_params = this->uniforms * 4; 116 117 c->prog_data.curb_read_length = reg - 1; 118 c->prog_data.uses_new_param_layout = true; 119 120 return reg; 121} 122 123void 124vec4_visitor::setup_payload(void) 125{ 126 int reg = 0; 127 128 /* The payload always contains important data in g0, which contains 129 * the URB handles that are passed on to the URB write at the end 130 * of the thread. So, we always start push constants at g1. 131 */ 132 reg++; 133 134 reg = setup_uniforms(reg); 135 136 reg = setup_attributes(reg); 137 138 this->first_non_payload_grf = reg; 139} 140 141struct brw_reg 142vec4_instruction::get_dst(void) 143{ 144 struct brw_reg brw_reg; 145 146 switch (dst.file) { 147 case GRF: 148 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 149 brw_reg = retype(brw_reg, dst.type); 150 brw_reg.dw1.bits.writemask = dst.writemask; 151 break; 152 153 case HW_REG: 154 brw_reg = dst.fixed_hw_reg; 155 break; 156 157 case BAD_FILE: 158 brw_reg = brw_null_reg(); 159 break; 160 161 default: 162 assert(!"not reached"); 163 brw_reg = brw_null_reg(); 164 break; 165 } 166 return brw_reg; 167} 168 169struct brw_reg 170vec4_instruction::get_src(int i) 171{ 172 struct brw_reg brw_reg; 173 174 switch (src[i].file) { 175 case GRF: 176 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 177 brw_reg = retype(brw_reg, src[i].type); 178 brw_reg.dw1.bits.swizzle = src[i].swizzle; 179 if (src[i].abs) 180 brw_reg = brw_abs(brw_reg); 181 if (src[i].negate) 182 brw_reg = negate(brw_reg); 183 break; 184 185 case IMM: 186 switch (src[i].type) { 187 case BRW_REGISTER_TYPE_F: 188 brw_reg = brw_imm_f(src[i].imm.f); 189 break; 190 case BRW_REGISTER_TYPE_D: 191 brw_reg = brw_imm_d(src[i].imm.i); 192 break; 193 case BRW_REGISTER_TYPE_UD: 194 brw_reg = brw_imm_ud(src[i].imm.u); 195 break; 196 default: 197 assert(!"not reached"); 198 brw_reg = brw_null_reg(); 199 break; 200 } 201 break; 202 203 case UNIFORM: 204 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, 205 ((src[i].reg + src[i].reg_offset) % 2) * 4), 206 0, 4, 1); 207 brw_reg = retype(brw_reg, src[i].type); 208 brw_reg.dw1.bits.swizzle = src[i].swizzle; 209 if (src[i].abs) 210 brw_reg = brw_abs(brw_reg); 211 if (src[i].negate) 212 brw_reg = negate(brw_reg); 213 break; 214 215 case HW_REG: 216 brw_reg = src[i].fixed_hw_reg; 217 break; 218 219 case BAD_FILE: 220 /* Probably unused. */ 221 brw_reg = brw_null_reg(); 222 break; 223 case ATTR: 224 default: 225 assert(!"not reached"); 226 brw_reg = brw_null_reg(); 227 break; 228 } 229 230 return brw_reg; 231} 232 233void 234vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 235 struct brw_reg dst, 236 struct brw_reg src) 237{ 238 brw_math(p, 239 dst, 240 brw_math_function(inst->opcode), 241 BRW_MATH_SATURATE_NONE, 242 inst->base_mrf, 243 src, 244 BRW_MATH_DATA_SCALAR, 245 BRW_MATH_PRECISION_FULL); 246} 247 248static void 249check_gen6_math_src_arg(struct brw_reg src) 250{ 251 /* Source swizzles are ignored. */ 252 assert(!src.abs); 253 assert(!src.negate); 254 assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); 255} 256 257void 258vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 259 struct brw_reg dst, 260 struct brw_reg src) 261{ 262 /* Can't do writemask because math can't be align16. */ 263 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); 264 check_gen6_math_src_arg(src); 265 266 brw_set_access_mode(p, BRW_ALIGN_1); 267 brw_math(p, 268 dst, 269 brw_math_function(inst->opcode), 270 BRW_MATH_SATURATE_NONE, 271 inst->base_mrf, 272 src, 273 BRW_MATH_DATA_SCALAR, 274 BRW_MATH_PRECISION_FULL); 275 brw_set_access_mode(p, BRW_ALIGN_16); 276} 277 278void 279vec4_visitor::generate_math2_gen6(vec4_instruction *inst, 280 struct brw_reg dst, 281 struct brw_reg src0, 282 struct brw_reg src1) 283{ 284 /* Can't do writemask because math can't be align16. */ 285 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); 286 /* Source swizzles are ignored. */ 287 check_gen6_math_src_arg(src0); 288 check_gen6_math_src_arg(src1); 289 290 brw_set_access_mode(p, BRW_ALIGN_1); 291 brw_math2(p, 292 dst, 293 brw_math_function(inst->opcode), 294 src0, src1); 295 brw_set_access_mode(p, BRW_ALIGN_16); 296} 297 298void 299vec4_visitor::generate_math2_gen4(vec4_instruction *inst, 300 struct brw_reg dst, 301 struct brw_reg src0, 302 struct brw_reg src1) 303{ 304 /* Can't do writemask because math can't be align16. */ 305 assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); 306 307 brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); 308 309 brw_set_access_mode(p, BRW_ALIGN_1); 310 brw_math(p, 311 dst, 312 brw_math_function(inst->opcode), 313 BRW_MATH_SATURATE_NONE, 314 inst->base_mrf, 315 src0, 316 BRW_MATH_DATA_VECTOR, 317 BRW_MATH_PRECISION_FULL); 318 brw_set_access_mode(p, BRW_ALIGN_16); 319} 320 321void 322vec4_visitor::generate_urb_write(vec4_instruction *inst) 323{ 324 brw_urb_WRITE(p, 325 brw_null_reg(), /* dest */ 326 inst->base_mrf, /* starting mrf reg nr */ 327 brw_vec8_grf(0, 0), /* src */ 328 false, /* allocate */ 329 true, /* used */ 330 inst->mlen, 331 0, /* response len */ 332 inst->eot, /* eot */ 333 inst->eot, /* writes complete */ 334 inst->offset, /* urb destination offset */ 335 BRW_URB_SWIZZLE_INTERLEAVE); 336} 337 338void 339vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, 340 struct brw_reg index) 341{ 342 int second_vertex_offset; 343 344 if (intel->gen >= 6) 345 second_vertex_offset = 1; 346 else 347 second_vertex_offset = 16; 348 349 m1 = retype(m1, BRW_REGISTER_TYPE_D); 350 351 /* Set up M1 (message payload). Only the block offsets in M1.0 and 352 * M1.4 are used, and the rest are ignored. 353 */ 354 struct brw_reg m1_0 = suboffset(vec1(m1), 0); 355 struct brw_reg m1_4 = suboffset(vec1(m1), 4); 356 struct brw_reg index_0 = suboffset(vec1(index), 0); 357 struct brw_reg index_4 = suboffset(vec1(index), 4); 358 359 brw_push_insn_state(p); 360 brw_set_mask_control(p, BRW_MASK_DISABLE); 361 brw_set_access_mode(p, BRW_ALIGN_1); 362 363 brw_MOV(p, m1_0, index_0); 364 365 brw_set_predicate_inverse(p, true); 366 if (index.file == BRW_IMMEDIATE_VALUE) { 367 index_4.dw1.ud++; 368 brw_MOV(p, m1_4, index_4); 369 } else { 370 brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); 371 } 372 373 brw_pop_insn_state(p); 374} 375 376void 377vec4_visitor::generate_scratch_read(vec4_instruction *inst, 378 struct brw_reg dst, 379 struct brw_reg index) 380{ 381 if (intel->gen >= 6) { 382 brw_push_insn_state(p); 383 brw_set_mask_control(p, BRW_MASK_DISABLE); 384 brw_MOV(p, 385 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), 386 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); 387 brw_pop_insn_state(p); 388 } 389 390 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), 391 index); 392 393 uint32_t msg_type; 394 395 if (intel->gen >= 6) 396 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 397 else if (intel->gen == 5 || intel->is_g4x) 398 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 399 else 400 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 401 402 /* Each of the 8 channel enables is considered for whether each 403 * dword is written. 404 */ 405 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 406 brw_set_dest(p, send, dst); 407 brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); 408 brw_set_dp_read_message(p, send, 409 255, /* binding table index: stateless access */ 410 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 411 msg_type, 412 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 413 2, /* mlen */ 414 1 /* rlen */); 415} 416 417void 418vec4_visitor::generate_scratch_write(vec4_instruction *inst, 419 struct brw_reg dst, 420 struct brw_reg src, 421 struct brw_reg index) 422{ 423 /* If the instruction is predicated, we'll predicate the send, not 424 * the header setup. 425 */ 426 brw_set_predicate_control(p, false); 427 428 if (intel->gen >= 6) { 429 brw_push_insn_state(p); 430 brw_set_mask_control(p, BRW_MASK_DISABLE); 431 brw_MOV(p, 432 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), 433 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); 434 brw_pop_insn_state(p); 435 } 436 437 generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), 438 index); 439 440 brw_MOV(p, 441 retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), 442 retype(src, BRW_REGISTER_TYPE_D)); 443 444 uint32_t msg_type; 445 446 if (intel->gen >= 6) 447 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 448 else 449 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; 450 451 brw_set_predicate_control(p, inst->predicate); 452 453 /* Each of the 8 channel enables is considered for whether each 454 * dword is written. 455 */ 456 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 457 brw_set_dest(p, send, dst); 458 brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); 459 brw_set_dp_write_message(p, send, 460 255, /* binding table index: stateless access */ 461 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 462 msg_type, 463 3, /* mlen */ 464 true, /* header present */ 465 false, /* pixel scoreboard */ 466 0, /* rlen */ 467 false, /* eot */ 468 false /* commit */); 469} 470 471void 472vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 473 struct brw_reg dst, 474 struct brw_reg *src) 475{ 476 vec4_instruction *inst = (vec4_instruction *)instruction; 477 478 switch (inst->opcode) { 479 case SHADER_OPCODE_RCP: 480 case SHADER_OPCODE_RSQ: 481 case SHADER_OPCODE_SQRT: 482 case SHADER_OPCODE_EXP2: 483 case SHADER_OPCODE_LOG2: 484 case SHADER_OPCODE_SIN: 485 case SHADER_OPCODE_COS: 486 if (intel->gen >= 6) { 487 generate_math1_gen6(inst, dst, src[0]); 488 } else { 489 generate_math1_gen4(inst, dst, src[0]); 490 } 491 break; 492 493 case SHADER_OPCODE_POW: 494 if (intel->gen >= 6) { 495 generate_math2_gen6(inst, dst, src[0], src[1]); 496 } else { 497 generate_math2_gen4(inst, dst, src[0], src[1]); 498 } 499 break; 500 501 case VS_OPCODE_URB_WRITE: 502 generate_urb_write(inst); 503 break; 504 505 case VS_OPCODE_SCRATCH_READ: 506 generate_scratch_read(inst, dst, src[0]); 507 break; 508 509 case VS_OPCODE_SCRATCH_WRITE: 510 generate_scratch_write(inst, dst, src[0], src[1]); 511 break; 512 513 default: 514 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 515 fail("unsupported opcode in `%s' in VS\n", 516 brw_opcodes[inst->opcode].name); 517 } else { 518 fail("Unsupported opcode %d in VS", inst->opcode); 519 } 520 } 521} 522 523bool 524vec4_visitor::run() 525{ 526 /* Generate VS IR for main(). (the visitor only descends into 527 * functions called "main"). 528 */ 529 foreach_iter(exec_list_iterator, iter, *shader->ir) { 530 ir_instruction *ir = (ir_instruction *)iter.get(); 531 base_ir = ir; 532 ir->accept(this); 533 } 534 535 emit_urb_writes(); 536 537 /* Before any optimization, push array accesses out to scratch 538 * space where we need them to be. This pass may allocate new 539 * virtual GRFs, so we want to do it early. It also makes sure 540 * that we have reladdr computations available for CSE, since we'll 541 * often do repeated subexpressions for those. 542 */ 543 move_grf_array_access_to_scratch(); 544 545 if (failed) 546 return false; 547 548 setup_payload(); 549 reg_allocate(); 550 551 brw_set_access_mode(p, BRW_ALIGN_16); 552 553 generate_code(); 554 555 return !failed; 556} 557 558void 559vec4_visitor::generate_code() 560{ 561 int last_native_inst = p->nr_insn; 562 const char *last_annotation_string = NULL; 563 ir_instruction *last_annotation_ir = NULL; 564 565 int loop_stack_array_size = 16; 566 int loop_stack_depth = 0; 567 brw_instruction **loop_stack = 568 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 569 int *if_depth_in_loop = 570 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 571 572 573 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 574 printf("Native code for vertex shader %d:\n", prog->Name); 575 } 576 577 foreach_list(node, &this->instructions) { 578 vec4_instruction *inst = (vec4_instruction *)node; 579 struct brw_reg src[3], dst; 580 581 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 582 if (last_annotation_ir != inst->ir) { 583 last_annotation_ir = inst->ir; 584 if (last_annotation_ir) { 585 printf(" "); 586 last_annotation_ir->print(); 587 printf("\n"); 588 } 589 } 590 if (last_annotation_string != inst->annotation) { 591 last_annotation_string = inst->annotation; 592 if (last_annotation_string) 593 printf(" %s\n", last_annotation_string); 594 } 595 } 596 597 for (unsigned int i = 0; i < 3; i++) { 598 src[i] = inst->get_src(i); 599 } 600 dst = inst->get_dst(); 601 602 brw_set_conditionalmod(p, inst->conditional_mod); 603 brw_set_predicate_control(p, inst->predicate); 604 brw_set_predicate_inverse(p, inst->predicate_inverse); 605 brw_set_saturate(p, inst->saturate); 606 607 switch (inst->opcode) { 608 case BRW_OPCODE_MOV: 609 brw_MOV(p, dst, src[0]); 610 break; 611 case BRW_OPCODE_ADD: 612 brw_ADD(p, dst, src[0], src[1]); 613 break; 614 case BRW_OPCODE_MUL: 615 brw_MUL(p, dst, src[0], src[1]); 616 break; 617 618 case BRW_OPCODE_FRC: 619 brw_FRC(p, dst, src[0]); 620 break; 621 case BRW_OPCODE_RNDD: 622 brw_RNDD(p, dst, src[0]); 623 break; 624 case BRW_OPCODE_RNDE: 625 brw_RNDE(p, dst, src[0]); 626 break; 627 case BRW_OPCODE_RNDZ: 628 brw_RNDZ(p, dst, src[0]); 629 break; 630 631 case BRW_OPCODE_AND: 632 brw_AND(p, dst, src[0], src[1]); 633 break; 634 case BRW_OPCODE_OR: 635 brw_OR(p, dst, src[0], src[1]); 636 break; 637 case BRW_OPCODE_XOR: 638 brw_XOR(p, dst, src[0], src[1]); 639 break; 640 case BRW_OPCODE_NOT: 641 brw_NOT(p, dst, src[0]); 642 break; 643 case BRW_OPCODE_ASR: 644 brw_ASR(p, dst, src[0], src[1]); 645 break; 646 case BRW_OPCODE_SHR: 647 brw_SHR(p, dst, src[0], src[1]); 648 break; 649 case BRW_OPCODE_SHL: 650 brw_SHL(p, dst, src[0], src[1]); 651 break; 652 653 case BRW_OPCODE_CMP: 654 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 655 break; 656 case BRW_OPCODE_SEL: 657 brw_SEL(p, dst, src[0], src[1]); 658 break; 659 660 case BRW_OPCODE_DP4: 661 brw_DP4(p, dst, src[0], src[1]); 662 break; 663 664 case BRW_OPCODE_DP3: 665 brw_DP3(p, dst, src[0], src[1]); 666 break; 667 668 case BRW_OPCODE_DP2: 669 brw_DP2(p, dst, src[0], src[1]); 670 break; 671 672 case BRW_OPCODE_IF: 673 if (inst->src[0].file != BAD_FILE) { 674 /* The instruction has an embedded compare (only allowed on gen6) */ 675 assert(intel->gen == 6); 676 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 677 } else { 678 struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); 679 brw_inst->header.predicate_control = inst->predicate; 680 } 681 if_depth_in_loop[loop_stack_depth]++; 682 break; 683 684 case BRW_OPCODE_ELSE: 685 brw_ELSE(p); 686 break; 687 case BRW_OPCODE_ENDIF: 688 brw_ENDIF(p); 689 if_depth_in_loop[loop_stack_depth]--; 690 break; 691 692 case BRW_OPCODE_DO: 693 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 694 if (loop_stack_array_size <= loop_stack_depth) { 695 loop_stack_array_size *= 2; 696 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 697 loop_stack_array_size); 698 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 699 loop_stack_array_size); 700 } 701 if_depth_in_loop[loop_stack_depth] = 0; 702 break; 703 704 case BRW_OPCODE_BREAK: 705 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 706 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 707 break; 708 case BRW_OPCODE_CONTINUE: 709 /* FINISHME: We need to write the loop instruction support still. */ 710 if (intel->gen >= 6) 711 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 712 else 713 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 714 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 715 break; 716 717 case BRW_OPCODE_WHILE: { 718 struct brw_instruction *inst0, *inst1; 719 GLuint br = 1; 720 721 if (intel->gen >= 5) 722 br = 2; 723 724 assert(loop_stack_depth > 0); 725 loop_stack_depth--; 726 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 727 if (intel->gen < 6) { 728 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 729 while (inst0 > loop_stack[loop_stack_depth]) { 730 inst0--; 731 if (inst0->header.opcode == BRW_OPCODE_BREAK && 732 inst0->bits3.if_else.jump_count == 0) { 733 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 734 } 735 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 736 inst0->bits3.if_else.jump_count == 0) { 737 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 738 } 739 } 740 } 741 } 742 break; 743 744 default: 745 generate_vs_instruction(inst, dst, src); 746 break; 747 } 748 749 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 750 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 751 if (0) { 752 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 753 ((uint32_t *)&p->store[i])[3], 754 ((uint32_t *)&p->store[i])[2], 755 ((uint32_t *)&p->store[i])[1], 756 ((uint32_t *)&p->store[i])[0]); 757 } 758 brw_disasm(stdout, &p->store[i], intel->gen); 759 } 760 } 761 762 last_native_inst = p->nr_insn; 763 } 764 765 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 766 printf("\n"); 767 } 768 769 ralloc_free(loop_stack); 770 ralloc_free(if_depth_in_loop); 771 772 brw_set_uip_jip(p); 773 774 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 775 * emit issues, it doesn't get the jump distances into the output, 776 * which is often something we want to debug. So this is here in 777 * case you're doing that. 778 */ 779 if (0) { 780 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 781 for (unsigned int i = 0; i < p->nr_insn; i++) { 782 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 783 ((uint32_t *)&p->store[i])[3], 784 ((uint32_t *)&p->store[i])[2], 785 ((uint32_t *)&p->store[i])[1], 786 ((uint32_t *)&p->store[i])[0]); 787 brw_disasm(stdout, &p->store[i], intel->gen); 788 } 789 } 790 } 791} 792 793extern "C" { 794 795bool 796brw_vs_emit(struct brw_vs_compile *c) 797{ 798 struct brw_compile *p = &c->func; 799 struct brw_context *brw = p->brw; 800 struct intel_context *intel = &brw->intel; 801 struct gl_context *ctx = &intel->ctx; 802 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 803 804 if (!prog) 805 return false; 806 807 struct brw_shader *shader = 808 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 809 if (!shader) 810 return false; 811 812 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 813 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 814 _mesa_print_ir(shader->ir, NULL); 815 printf("\n\n"); 816 } 817 818 vec4_visitor v(c, prog, shader); 819 if (!v.run()) { 820 /* FINISHME: Cleanly fail, test at link time, etc. */ 821 assert(!"not reached"); 822 return false; 823 } 824 825 return true; 826} 827 828} /* extern "C" */ 829 830} /* namespace brw */ 831