brw_vec4.cpp revision b2f5d4c3ec9ec2fec8b39c87eb00121a24107276
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25extern "C" { 26#include "main/macros.h" 27#include "program/prog_parameter.h" 28} 29 30#define MAX_INSTRUCTION (1 << 30) 31 32namespace brw { 33 34/** 35 * Common helper for constructing swizzles. When only a subset of 36 * channels of a vec4 are used, we don't want to reference the other 37 * channels, as that will tell optimization passes that those other 38 * channels are used. 39 */ 40unsigned 41swizzle_for_size(int size) 42{ 43 static const unsigned size_swizzles[4] = { 44 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 45 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 46 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 47 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 48 }; 49 50 assert((size >= 1) && (size <= 4)); 51 return size_swizzles[size - 1]; 52} 53 54void 55src_reg::init() 56{ 57 memset(this, 0, sizeof(*this)); 58 59 this->file = BAD_FILE; 60} 61 62src_reg::src_reg(register_file file, int reg, const glsl_type *type) 63{ 64 init(); 65 66 this->file = file; 67 this->reg = reg; 68 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 69 this->swizzle = swizzle_for_size(type->vector_elements); 70 else 71 this->swizzle = SWIZZLE_XYZW; 72} 73 74/** Generic unset register constructor. */ 75src_reg::src_reg() 76{ 77 init(); 78} 79 80src_reg::src_reg(float f) 81{ 82 init(); 83 84 this->file = IMM; 85 this->type = BRW_REGISTER_TYPE_F; 86 this->imm.f = f; 87} 88 89src_reg::src_reg(uint32_t u) 90{ 91 init(); 92 93 this->file = IMM; 94 this->type = BRW_REGISTER_TYPE_UD; 95 this->imm.u = u; 96} 97 98src_reg::src_reg(int32_t i) 99{ 100 init(); 101 102 this->file = IMM; 103 this->type = BRW_REGISTER_TYPE_D; 104 this->imm.i = i; 105} 106 107bool 108vec4_instruction::is_tex() 109{ 110 return (opcode == SHADER_OPCODE_TEX || 111 opcode == SHADER_OPCODE_TXD || 112 opcode == SHADER_OPCODE_TXF || 113 opcode == SHADER_OPCODE_TXL || 114 opcode == SHADER_OPCODE_TXS); 115} 116 117void 118dst_reg::init() 119{ 120 memset(this, 0, sizeof(*this)); 121 this->file = BAD_FILE; 122 this->writemask = WRITEMASK_XYZW; 123} 124 125dst_reg::dst_reg() 126{ 127 init(); 128} 129 130dst_reg::dst_reg(register_file file, int reg) 131{ 132 init(); 133 134 this->file = file; 135 this->reg = reg; 136} 137 138dst_reg::dst_reg(register_file file, int reg, const glsl_type *type, 139 int writemask) 140{ 141 init(); 142 143 this->file = file; 144 this->reg = reg; 145 this->type = brw_type_for_base_type(type); 146 this->writemask = writemask; 147} 148 149dst_reg::dst_reg(struct brw_reg reg) 150{ 151 init(); 152 153 this->file = HW_REG; 154 this->fixed_hw_reg = reg; 155} 156 157bool 158vec4_instruction::is_math() 159{ 160 return (opcode == SHADER_OPCODE_RCP || 161 opcode == SHADER_OPCODE_RSQ || 162 opcode == SHADER_OPCODE_SQRT || 163 opcode == SHADER_OPCODE_EXP2 || 164 opcode == SHADER_OPCODE_LOG2 || 165 opcode == SHADER_OPCODE_SIN || 166 opcode == SHADER_OPCODE_COS || 167 opcode == SHADER_OPCODE_INT_QUOTIENT || 168 opcode == SHADER_OPCODE_INT_REMAINDER || 169 opcode == SHADER_OPCODE_POW); 170} 171/** 172 * Returns how many MRFs an opcode will write over. 173 * 174 * Note that this is not the 0 or 1 implied writes in an actual gen 175 * instruction -- the generate_* functions generate additional MOVs 176 * for setup. 177 */ 178int 179vec4_visitor::implied_mrf_writes(vec4_instruction *inst) 180{ 181 if (inst->mlen == 0) 182 return 0; 183 184 switch (inst->opcode) { 185 case SHADER_OPCODE_RCP: 186 case SHADER_OPCODE_RSQ: 187 case SHADER_OPCODE_SQRT: 188 case SHADER_OPCODE_EXP2: 189 case SHADER_OPCODE_LOG2: 190 case SHADER_OPCODE_SIN: 191 case SHADER_OPCODE_COS: 192 return 1; 193 case SHADER_OPCODE_POW: 194 return 2; 195 case VS_OPCODE_URB_WRITE: 196 return 1; 197 case VS_OPCODE_PULL_CONSTANT_LOAD: 198 return 2; 199 case VS_OPCODE_SCRATCH_READ: 200 return 2; 201 case VS_OPCODE_SCRATCH_WRITE: 202 return 3; 203 default: 204 assert(!"not reached"); 205 return inst->mlen; 206 } 207} 208 209bool 210src_reg::equals(src_reg *r) 211{ 212 return (file == r->file && 213 reg == r->reg && 214 reg_offset == r->reg_offset && 215 type == r->type && 216 negate == r->negate && 217 abs == r->abs && 218 swizzle == r->swizzle && 219 !reladdr && !r->reladdr && 220 memcmp(&fixed_hw_reg, &r->fixed_hw_reg, 221 sizeof(fixed_hw_reg)) == 0 && 222 imm.u == r->imm.u); 223} 224 225void 226vec4_visitor::calculate_live_intervals() 227{ 228 int *def = ralloc_array(mem_ctx, int, virtual_grf_count); 229 int *use = ralloc_array(mem_ctx, int, virtual_grf_count); 230 int loop_depth = 0; 231 int loop_start = 0; 232 233 if (this->live_intervals_valid) 234 return; 235 236 for (int i = 0; i < virtual_grf_count; i++) { 237 def[i] = MAX_INSTRUCTION; 238 use[i] = -1; 239 } 240 241 int ip = 0; 242 foreach_list(node, &this->instructions) { 243 vec4_instruction *inst = (vec4_instruction *)node; 244 245 if (inst->opcode == BRW_OPCODE_DO) { 246 if (loop_depth++ == 0) 247 loop_start = ip; 248 } else if (inst->opcode == BRW_OPCODE_WHILE) { 249 loop_depth--; 250 251 if (loop_depth == 0) { 252 /* Patches up the use of vars marked for being live across 253 * the whole loop. 254 */ 255 for (int i = 0; i < virtual_grf_count; i++) { 256 if (use[i] == loop_start) { 257 use[i] = ip; 258 } 259 } 260 } 261 } else { 262 for (unsigned int i = 0; i < 3; i++) { 263 if (inst->src[i].file == GRF) { 264 int reg = inst->src[i].reg; 265 266 if (!loop_depth) { 267 use[reg] = ip; 268 } else { 269 def[reg] = MIN2(loop_start, def[reg]); 270 use[reg] = loop_start; 271 272 /* Nobody else is going to go smash our start to 273 * later in the loop now, because def[reg] now 274 * points before the bb header. 275 */ 276 } 277 } 278 } 279 if (inst->dst.file == GRF) { 280 int reg = inst->dst.reg; 281 282 if (!loop_depth) { 283 def[reg] = MIN2(def[reg], ip); 284 } else { 285 def[reg] = MIN2(def[reg], loop_start); 286 } 287 } 288 } 289 290 ip++; 291 } 292 293 ralloc_free(this->virtual_grf_def); 294 ralloc_free(this->virtual_grf_use); 295 this->virtual_grf_def = def; 296 this->virtual_grf_use = use; 297 298 this->live_intervals_valid = true; 299} 300 301bool 302vec4_visitor::virtual_grf_interferes(int a, int b) 303{ 304 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); 305 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); 306 307 /* We can't handle dead register writes here, without iterating 308 * over the whole instruction stream to find every single dead 309 * write to that register to compare to the live interval of the 310 * other register. Just assert that dead_code_eliminate() has been 311 * called. 312 */ 313 assert((this->virtual_grf_use[a] != -1 || 314 this->virtual_grf_def[a] == MAX_INSTRUCTION) && 315 (this->virtual_grf_use[b] != -1 || 316 this->virtual_grf_def[b] == MAX_INSTRUCTION)); 317 318 return start < end; 319} 320 321/** 322 * Must be called after calculate_live_intervales() to remove unused 323 * writes to registers -- register allocation will fail otherwise 324 * because something deffed but not used won't be considered to 325 * interfere with other regs. 326 */ 327bool 328vec4_visitor::dead_code_eliminate() 329{ 330 bool progress = false; 331 int pc = 0; 332 333 calculate_live_intervals(); 334 335 foreach_list_safe(node, &this->instructions) { 336 vec4_instruction *inst = (vec4_instruction *)node; 337 338 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { 339 inst->remove(); 340 progress = true; 341 } 342 343 pc++; 344 } 345 346 if (progress) 347 live_intervals_valid = false; 348 349 return progress; 350} 351 352void 353vec4_visitor::split_uniform_registers() 354{ 355 /* Prior to this, uniforms have been in an array sized according to 356 * the number of vector uniforms present, sparsely filled (so an 357 * aggregate results in reg indices being skipped over). Now we're 358 * going to cut those aggregates up so each .reg index is one 359 * vector. The goal is to make elimination of unused uniform 360 * components easier later. 361 */ 362 foreach_list(node, &this->instructions) { 363 vec4_instruction *inst = (vec4_instruction *)node; 364 365 for (int i = 0 ; i < 3; i++) { 366 if (inst->src[i].file != UNIFORM) 367 continue; 368 369 assert(!inst->src[i].reladdr); 370 371 inst->src[i].reg += inst->src[i].reg_offset; 372 inst->src[i].reg_offset = 0; 373 } 374 } 375 376 /* Update that everything is now vector-sized. */ 377 for (int i = 0; i < this->uniforms; i++) { 378 this->uniform_size[i] = 1; 379 } 380} 381 382void 383vec4_visitor::pack_uniform_registers() 384{ 385 bool uniform_used[this->uniforms]; 386 int new_loc[this->uniforms]; 387 int new_chan[this->uniforms]; 388 389 memset(uniform_used, 0, sizeof(uniform_used)); 390 memset(new_loc, 0, sizeof(new_loc)); 391 memset(new_chan, 0, sizeof(new_chan)); 392 393 /* Find which uniform vectors are actually used by the program. We 394 * expect unused vector elements when we've moved array access out 395 * to pull constants, and from some GLSL code generators like wine. 396 */ 397 foreach_list(node, &this->instructions) { 398 vec4_instruction *inst = (vec4_instruction *)node; 399 400 for (int i = 0 ; i < 3; i++) { 401 if (inst->src[i].file != UNIFORM) 402 continue; 403 404 uniform_used[inst->src[i].reg] = true; 405 } 406 } 407 408 int new_uniform_count = 0; 409 410 /* Now, figure out a packing of the live uniform vectors into our 411 * push constants. 412 */ 413 for (int src = 0; src < uniforms; src++) { 414 int size = this->uniform_vector_size[src]; 415 416 if (!uniform_used[src]) { 417 this->uniform_vector_size[src] = 0; 418 continue; 419 } 420 421 int dst; 422 /* Find the lowest place we can slot this uniform in. */ 423 for (dst = 0; dst < src; dst++) { 424 if (this->uniform_vector_size[dst] + size <= 4) 425 break; 426 } 427 428 if (src == dst) { 429 new_loc[src] = dst; 430 new_chan[src] = 0; 431 } else { 432 new_loc[src] = dst; 433 new_chan[src] = this->uniform_vector_size[dst]; 434 435 /* Move the references to the data */ 436 for (int j = 0; j < size; j++) { 437 c->prog_data.param[dst * 4 + new_chan[src] + j] = 438 c->prog_data.param[src * 4 + j]; 439 } 440 441 this->uniform_vector_size[dst] += size; 442 this->uniform_vector_size[src] = 0; 443 } 444 445 new_uniform_count = MAX2(new_uniform_count, dst + 1); 446 } 447 448 this->uniforms = new_uniform_count; 449 450 /* Now, update the instructions for our repacked uniforms. */ 451 foreach_list(node, &this->instructions) { 452 vec4_instruction *inst = (vec4_instruction *)node; 453 454 for (int i = 0 ; i < 3; i++) { 455 int src = inst->src[i].reg; 456 457 if (inst->src[i].file != UNIFORM) 458 continue; 459 460 inst->src[i].reg = new_loc[src]; 461 462 int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src]; 463 int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src]; 464 int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src]; 465 int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src]; 466 inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw); 467 } 468 } 469} 470 471bool 472src_reg::is_zero() const 473{ 474 if (file != IMM) 475 return false; 476 477 if (type == BRW_REGISTER_TYPE_F) { 478 return imm.f == 0.0; 479 } else { 480 return imm.i == 0; 481 } 482} 483 484bool 485src_reg::is_one() const 486{ 487 if (file != IMM) 488 return false; 489 490 if (type == BRW_REGISTER_TYPE_F) { 491 return imm.f == 1.0; 492 } else { 493 return imm.i == 1; 494 } 495} 496 497/** 498 * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). 499 * 500 * While GLSL IR also performs this optimization, we end up with it in 501 * our instruction stream for a couple of reasons. One is that we 502 * sometimes generate silly instructions, for example in array access 503 * where we'll generate "ADD offset, index, base" even if base is 0. 504 * The other is that GLSL IR's constant propagation doesn't track the 505 * components of aggregates, so some VS patterns (initialize matrix to 506 * 0, accumulate in vertex blending factors) end up breaking down to 507 * instructions involving 0. 508 */ 509bool 510vec4_visitor::opt_algebraic() 511{ 512 bool progress = false; 513 514 foreach_list(node, &this->instructions) { 515 vec4_instruction *inst = (vec4_instruction *)node; 516 517 switch (inst->opcode) { 518 case BRW_OPCODE_ADD: 519 if (inst->src[1].is_zero()) { 520 inst->opcode = BRW_OPCODE_MOV; 521 inst->src[1] = src_reg(); 522 progress = true; 523 } 524 break; 525 526 case BRW_OPCODE_MUL: 527 if (inst->src[1].is_zero()) { 528 inst->opcode = BRW_OPCODE_MOV; 529 switch (inst->src[0].type) { 530 case BRW_REGISTER_TYPE_F: 531 inst->src[0] = src_reg(0.0f); 532 break; 533 case BRW_REGISTER_TYPE_D: 534 inst->src[0] = src_reg(0); 535 break; 536 case BRW_REGISTER_TYPE_UD: 537 inst->src[0] = src_reg(0u); 538 break; 539 default: 540 assert(!"not reached"); 541 inst->src[0] = src_reg(0.0f); 542 break; 543 } 544 inst->src[1] = src_reg(); 545 progress = true; 546 } else if (inst->src[1].is_one()) { 547 inst->opcode = BRW_OPCODE_MOV; 548 inst->src[1] = src_reg(); 549 progress = true; 550 } 551 break; 552 default: 553 break; 554 } 555 } 556 557 if (progress) 558 this->live_intervals_valid = false; 559 560 return progress; 561} 562 563/** 564 * Only a limited number of hardware registers may be used for push 565 * constants, so this turns access to the overflowed constants into 566 * pull constants. 567 */ 568void 569vec4_visitor::move_push_constants_to_pull_constants() 570{ 571 int pull_constant_loc[this->uniforms]; 572 573 /* Only allow 32 registers (256 uniform components) as push constants, 574 * which is the limit on gen6. 575 */ 576 int max_uniform_components = 32 * 8; 577 if (this->uniforms * 4 <= max_uniform_components) 578 return; 579 580 /* Make some sort of choice as to which uniforms get sent to pull 581 * constants. We could potentially do something clever here like 582 * look for the most infrequently used uniform vec4s, but leave 583 * that for later. 584 */ 585 for (int i = 0; i < this->uniforms * 4; i += 4) { 586 pull_constant_loc[i / 4] = -1; 587 588 if (i >= max_uniform_components) { 589 const float **values = &prog_data->param[i]; 590 591 /* Try to find an existing copy of this uniform in the pull 592 * constants if it was part of an array access already. 593 */ 594 for (unsigned int j = 0; j < prog_data->nr_pull_params; j += 4) { 595 int matches; 596 597 for (matches = 0; matches < 4; matches++) { 598 if (prog_data->pull_param[j + matches] != values[matches]) 599 break; 600 } 601 602 if (matches == 4) { 603 pull_constant_loc[i / 4] = j / 4; 604 break; 605 } 606 } 607 608 if (pull_constant_loc[i / 4] == -1) { 609 assert(prog_data->nr_pull_params % 4 == 0); 610 pull_constant_loc[i / 4] = prog_data->nr_pull_params / 4; 611 612 for (int j = 0; j < 4; j++) { 613 prog_data->pull_param[prog_data->nr_pull_params++] = values[j]; 614 } 615 } 616 } 617 } 618 619 /* Now actually rewrite usage of the things we've moved to pull 620 * constants. 621 */ 622 foreach_list_safe(node, &this->instructions) { 623 vec4_instruction *inst = (vec4_instruction *)node; 624 625 for (int i = 0 ; i < 3; i++) { 626 if (inst->src[i].file != UNIFORM || 627 pull_constant_loc[inst->src[i].reg] == -1) 628 continue; 629 630 int uniform = inst->src[i].reg; 631 632 dst_reg temp = dst_reg(this, glsl_type::vec4_type); 633 634 emit_pull_constant_load(inst, temp, inst->src[i], 635 pull_constant_loc[uniform]); 636 637 inst->src[i].file = temp.file; 638 inst->src[i].reg = temp.reg; 639 inst->src[i].reg_offset = temp.reg_offset; 640 inst->src[i].reladdr = NULL; 641 } 642 } 643 644 /* Repack push constants to remove the now-unused ones. */ 645 pack_uniform_registers(); 646} 647 648/* 649 * Tries to reduce extra MOV instructions by taking GRFs that get just 650 * written and then MOVed into an MRF and making the original write of 651 * the GRF write directly to the MRF instead. 652 */ 653bool 654vec4_visitor::opt_compute_to_mrf() 655{ 656 bool progress = false; 657 int next_ip = 0; 658 659 calculate_live_intervals(); 660 661 foreach_list_safe(node, &this->instructions) { 662 vec4_instruction *inst = (vec4_instruction *)node; 663 664 int ip = next_ip; 665 next_ip++; 666 667 if (inst->opcode != BRW_OPCODE_MOV || 668 inst->predicate || 669 inst->dst.file != MRF || inst->src[0].file != GRF || 670 inst->dst.type != inst->src[0].type || 671 inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr) 672 continue; 673 674 int mrf = inst->dst.reg; 675 676 /* Can't compute-to-MRF this GRF if someone else was going to 677 * read it later. 678 */ 679 if (this->virtual_grf_use[inst->src[0].reg] > ip) 680 continue; 681 682 /* We need to check interference with the MRF between this 683 * instruction and the earliest instruction involved in writing 684 * the GRF we're eliminating. To do that, keep track of which 685 * of our source channels we've seen initialized. 686 */ 687 bool chans_needed[4] = {false, false, false, false}; 688 int chans_remaining = 0; 689 for (int i = 0; i < 4; i++) { 690 int chan = BRW_GET_SWZ(inst->src[0].swizzle, i); 691 692 if (!(inst->dst.writemask & (1 << i))) 693 continue; 694 695 /* We don't handle compute-to-MRF across a swizzle. We would 696 * need to be able to rewrite instructions above to output 697 * results to different channels. 698 */ 699 if (chan != i) 700 chans_remaining = 5; 701 702 if (!chans_needed[chan]) { 703 chans_needed[chan] = true; 704 chans_remaining++; 705 } 706 } 707 if (chans_remaining > 4) 708 continue; 709 710 /* Now walk up the instruction stream trying to see if we can 711 * rewrite everything writing to the GRF into the MRF instead. 712 */ 713 vec4_instruction *scan_inst; 714 for (scan_inst = (vec4_instruction *)inst->prev; 715 scan_inst->prev != NULL; 716 scan_inst = (vec4_instruction *)scan_inst->prev) { 717 if (scan_inst->dst.file == GRF && 718 scan_inst->dst.reg == inst->src[0].reg && 719 scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 720 /* Found something writing to the reg we want to turn into 721 * a compute-to-MRF. 722 */ 723 724 /* SEND instructions can't have MRF as a destination. */ 725 if (scan_inst->mlen) 726 break; 727 728 if (intel->gen >= 6) { 729 /* gen6 math instructions must have the destination be 730 * GRF, so no compute-to-MRF for them. 731 */ 732 if (scan_inst->is_math()) { 733 break; 734 } 735 } 736 737 /* Mark which channels we found unconditional writes for. */ 738 if (!scan_inst->predicate) { 739 for (int i = 0; i < 4; i++) { 740 if (scan_inst->dst.writemask & (1 << i) && 741 chans_needed[i]) { 742 chans_needed[i] = false; 743 chans_remaining--; 744 } 745 } 746 } 747 748 if (chans_remaining == 0) 749 break; 750 } 751 752 /* We don't handle flow control here. Most computation of 753 * values that end up in MRFs are shortly before the MRF 754 * write anyway. 755 */ 756 if (scan_inst->opcode == BRW_OPCODE_DO || 757 scan_inst->opcode == BRW_OPCODE_WHILE || 758 scan_inst->opcode == BRW_OPCODE_ELSE || 759 scan_inst->opcode == BRW_OPCODE_ENDIF) { 760 break; 761 } 762 763 /* You can't read from an MRF, so if someone else reads our 764 * MRF's source GRF that we wanted to rewrite, that stops us. 765 */ 766 bool interfered = false; 767 for (int i = 0; i < 3; i++) { 768 if (scan_inst->src[i].file == GRF && 769 scan_inst->src[i].reg == inst->src[0].reg && 770 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { 771 interfered = true; 772 } 773 } 774 if (interfered) 775 break; 776 777 /* If somebody else writes our MRF here, we can't 778 * compute-to-MRF before that. 779 */ 780 if (scan_inst->dst.file == MRF && mrf == scan_inst->dst.reg) 781 break; 782 783 if (scan_inst->mlen > 0) { 784 /* Found a SEND instruction, which means that there are 785 * live values in MRFs from base_mrf to base_mrf + 786 * scan_inst->mlen - 1. Don't go pushing our MRF write up 787 * above it. 788 */ 789 if (mrf >= scan_inst->base_mrf && 790 mrf < scan_inst->base_mrf + scan_inst->mlen) { 791 break; 792 } 793 } 794 } 795 796 if (chans_remaining == 0) { 797 /* If we've made it here, we have an inst we want to 798 * compute-to-MRF, and a scan_inst pointing to the earliest 799 * instruction involved in computing the value. Now go 800 * rewrite the instruction stream between the two. 801 */ 802 803 while (scan_inst != inst) { 804 if (scan_inst->dst.file == GRF && 805 scan_inst->dst.reg == inst->src[0].reg && 806 scan_inst->dst.reg_offset == inst->src[0].reg_offset) { 807 scan_inst->dst.file = MRF; 808 scan_inst->dst.reg = mrf; 809 scan_inst->dst.reg_offset = 0; 810 scan_inst->saturate |= inst->saturate; 811 } 812 scan_inst = (vec4_instruction *)scan_inst->next; 813 } 814 inst->remove(); 815 progress = true; 816 } 817 } 818 819 if (progress) 820 live_intervals_valid = false; 821 822 return progress; 823} 824 825} /* namespace brw */ 826