PostRASchedulerList.cpp revision 0eeb45137a84aaa4dd22567dddce6bb07f487941
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "ExactHazardRecognizer.h" 23#include "SimpleHazardRecognizer.h" 24#include "ScheduleDAGInstrs.h" 25#include "llvm/CodeGen/Passes.h" 26#include "llvm/CodeGen/LatencyPriorityQueue.h" 27#include "llvm/CodeGen/SchedulerRegistry.h" 28#include "llvm/CodeGen/MachineDominators.h" 29#include "llvm/CodeGen/MachineFunctionPass.h" 30#include "llvm/CodeGen/MachineLoopInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 33#include "llvm/Target/TargetLowering.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Target/TargetInstrInfo.h" 36#include "llvm/Target/TargetRegisterInfo.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/ErrorHandling.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/ADT/Statistic.h" 42#include <map> 43using namespace llvm; 44 45STATISTIC(NumNoops, "Number of noops inserted"); 46STATISTIC(NumStalls, "Number of pipeline stalls"); 47 48static cl::opt<bool> 49EnableAntiDepBreaking("break-anti-dependencies", 50 cl::desc("Break post-RA scheduling anti-dependencies"), 51 cl::init(true), cl::Hidden); 52 53static cl::opt<bool> 54EnablePostRAHazardAvoidance("avoid-hazards", 55 cl::desc("Enable exact hazard avoidance"), 56 cl::init(false), cl::Hidden); 57 58namespace { 59 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 60 public: 61 static char ID; 62 PostRAScheduler() : MachineFunctionPass(&ID) {} 63 64 void getAnalysisUsage(AnalysisUsage &AU) const { 65 AU.setPreservesCFG(); 66 AU.addRequired<MachineDominatorTree>(); 67 AU.addPreserved<MachineDominatorTree>(); 68 AU.addRequired<MachineLoopInfo>(); 69 AU.addPreserved<MachineLoopInfo>(); 70 MachineFunctionPass::getAnalysisUsage(AU); 71 } 72 73 const char *getPassName() const { 74 return "Post RA top-down list latency scheduler"; 75 } 76 77 bool runOnMachineFunction(MachineFunction &Fn); 78 }; 79 char PostRAScheduler::ID = 0; 80 81 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 82 /// AvailableQueue - The priority queue to use for the available SUnits. 83 /// 84 LatencyPriorityQueue AvailableQueue; 85 86 /// PendingQueue - This contains all of the instructions whose operands have 87 /// been issued, but their results are not ready yet (due to the latency of 88 /// the operation). Once the operands becomes available, the instruction is 89 /// added to the AvailableQueue. 90 std::vector<SUnit*> PendingQueue; 91 92 /// Topo - A topological ordering for SUnits. 93 ScheduleDAGTopologicalSort Topo; 94 95 /// AllocatableSet - The set of allocatable registers. 96 /// We'll be ignoring anti-dependencies on non-allocatable registers, 97 /// because they may not be safe to break. 98 const BitVector AllocatableSet; 99 100 /// HazardRec - The hazard recognizer to use. 101 ScheduleHazardRecognizer *HazardRec; 102 103 /// Classes - For live regs that are only used in one register class in a 104 /// live range, the register class. If the register is not live, the 105 /// corresponding value is null. If the register is live but used in 106 /// multiple register classes, the corresponding value is -1 casted to a 107 /// pointer. 108 const TargetRegisterClass * 109 Classes[TargetRegisterInfo::FirstVirtualRegister]; 110 111 /// RegRegs - Map registers to all their references within a live range. 112 std::multimap<unsigned, MachineOperand *> RegRefs; 113 114 /// The index of the most recent kill (proceding bottom-up), or ~0u if 115 /// the register is not live. 116 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 117 118 /// The index of the most recent complete def (proceding bottom up), or ~0u 119 /// if the register is live. 120 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 121 122 public: 123 SchedulePostRATDList(MachineFunction &MF, 124 const MachineLoopInfo &MLI, 125 const MachineDominatorTree &MDT, 126 ScheduleHazardRecognizer *HR) 127 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 128 AllocatableSet(TRI->getAllocatableSet(MF)), 129 HazardRec(HR) {} 130 131 ~SchedulePostRATDList() { 132 delete HazardRec; 133 } 134 135 /// StartBlock - Initialize register live-range state for scheduling in 136 /// this block. 137 /// 138 void StartBlock(MachineBasicBlock *BB); 139 140 /// Schedule - Schedule the instruction range using list scheduling. 141 /// 142 void Schedule(); 143 144 /// Observe - Update liveness information to account for the current 145 /// instruction, which will not be scheduled. 146 /// 147 void Observe(MachineInstr *MI, unsigned Count); 148 149 /// FinishBlock - Clean up register live-range state. 150 /// 151 void FinishBlock(); 152 153 private: 154 void PrescanInstruction(MachineInstr *MI); 155 void ScanInstruction(MachineInstr *MI, unsigned Count); 156 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 157 void ReleaseSuccessors(SUnit *SU); 158 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 159 void ListScheduleTopDown(); 160 bool BreakAntiDependencies(); 161 }; 162} 163 164/// isSchedulingBoundary - Test if the given instruction should be 165/// considered a scheduling boundary. This primarily includes labels 166/// and terminators. 167/// 168static bool isSchedulingBoundary(const MachineInstr *MI, 169 const MachineFunction &MF) { 170 // Terminators and labels can't be scheduled around. 171 if (MI->getDesc().isTerminator() || MI->isLabel()) 172 return true; 173 174 // Don't attempt to schedule around any instruction that modifies 175 // a stack-oriented pointer, as it's unlikely to be profitable. This 176 // saves compile time, because it doesn't require every single 177 // stack slot reference to depend on the instruction that does the 178 // modification. 179 const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); 180 if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) 181 return true; 182 183 return false; 184} 185 186bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 187 DEBUG(errs() << "PostRAScheduler\n"); 188 189 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 190 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 191 const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); 192 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 193 (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : 194 (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); 195 196 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 197 198 // Loop over all of the basic blocks 199 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 200 MBB != MBBe; ++MBB) { 201 // Initialize register live-range state for scheduling in this block. 202 Scheduler.StartBlock(MBB); 203 204 // Schedule each sequence of instructions not interrupted by a label 205 // or anything else that effectively needs to shut down scheduling. 206 MachineBasicBlock::iterator Current = MBB->end(); 207 unsigned Count = MBB->size(), CurrentCount = Count; 208 for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { 209 MachineInstr *MI = prior(I); 210 if (isSchedulingBoundary(MI, Fn)) { 211 Scheduler.Run(MBB, I, Current, CurrentCount); 212 Scheduler.EmitSchedule(); 213 Current = MI; 214 CurrentCount = Count - 1; 215 Scheduler.Observe(MI, CurrentCount); 216 } 217 I = MI; 218 --Count; 219 } 220 assert(Count == 0 && "Instruction count mismatch!"); 221 assert((MBB->begin() == Current || CurrentCount != 0) && 222 "Instruction count mismatch!"); 223 Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); 224 Scheduler.EmitSchedule(); 225 226 // Clean up register live-range state. 227 Scheduler.FinishBlock(); 228 } 229 230 return true; 231} 232 233/// StartBlock - Initialize register live-range state for scheduling in 234/// this block. 235/// 236void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { 237 // Call the superclass. 238 ScheduleDAGInstrs::StartBlock(BB); 239 240 // Reset the hazard recognizer. 241 HazardRec->Reset(); 242 243 // Clear out the register class data. 244 std::fill(Classes, array_endof(Classes), 245 static_cast<const TargetRegisterClass *>(0)); 246 247 // Initialize the indices to indicate that no registers are live. 248 std::fill(KillIndices, array_endof(KillIndices), ~0u); 249 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 250 251 // Determine the live-out physregs for this block. 252 if (!BB->empty() && BB->back().getDesc().isReturn()) 253 // In a return block, examine the function live-out regs. 254 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 255 E = MRI.liveout_end(); I != E; ++I) { 256 unsigned Reg = *I; 257 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 258 KillIndices[Reg] = BB->size(); 259 DefIndices[Reg] = ~0u; 260 // Repeat, for all aliases. 261 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 262 unsigned AliasReg = *Alias; 263 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 264 KillIndices[AliasReg] = BB->size(); 265 DefIndices[AliasReg] = ~0u; 266 } 267 } 268 else 269 // In a non-return block, examine the live-in regs of all successors. 270 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 271 SE = BB->succ_end(); SI != SE; ++SI) 272 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 273 E = (*SI)->livein_end(); I != E; ++I) { 274 unsigned Reg = *I; 275 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 276 KillIndices[Reg] = BB->size(); 277 DefIndices[Reg] = ~0u; 278 // Repeat, for all aliases. 279 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 280 unsigned AliasReg = *Alias; 281 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 282 KillIndices[AliasReg] = BB->size(); 283 DefIndices[AliasReg] = ~0u; 284 } 285 } 286 287 // Consider callee-saved registers as live-out, since we're running after 288 // prologue/epilogue insertion so there's no way to add additional 289 // saved registers. 290 // 291 // TODO: If the callee saves and restores these, then we can potentially 292 // use them between the save and the restore. To do that, we could scan 293 // the exit blocks to see which of these registers are defined. 294 // Alternatively, callee-saved registers that aren't saved and restored 295 // could be marked live-in in every block. 296 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 297 unsigned Reg = *I; 298 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 299 KillIndices[Reg] = BB->size(); 300 DefIndices[Reg] = ~0u; 301 // Repeat, for all aliases. 302 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 303 unsigned AliasReg = *Alias; 304 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 305 KillIndices[AliasReg] = BB->size(); 306 DefIndices[AliasReg] = ~0u; 307 } 308 } 309} 310 311/// Schedule - Schedule the instruction range using list scheduling. 312/// 313void SchedulePostRATDList::Schedule() { 314 DEBUG(errs() << "********** List Scheduling **********\n"); 315 316 // Build the scheduling graph. 317 BuildSchedGraph(); 318 319 if (EnableAntiDepBreaking) { 320 if (BreakAntiDependencies()) { 321 // We made changes. Update the dependency graph. 322 // Theoretically we could update the graph in place: 323 // When a live range is changed to use a different register, remove 324 // the def's anti-dependence *and* output-dependence edges due to 325 // that register, and add new anti-dependence and output-dependence 326 // edges based on the next live range of the register. 327 SUnits.clear(); 328 EntrySU = SUnit(); 329 ExitSU = SUnit(); 330 BuildSchedGraph(); 331 } 332 } 333 334 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 335 SUnits[su].dumpAll(this)); 336 337 AvailableQueue.initNodes(SUnits); 338 339 ListScheduleTopDown(); 340 341 AvailableQueue.releaseState(); 342} 343 344/// Observe - Update liveness information to account for the current 345/// instruction, which will not be scheduled. 346/// 347void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { 348 assert(Count < InsertPosIndex && "Instruction index out of expected range!"); 349 350 // Any register which was defined within the previous scheduling region 351 // may have been rescheduled and its lifetime may overlap with registers 352 // in ways not reflected in our current liveness state. For each such 353 // register, adjust the liveness state to be conservatively correct. 354 for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) 355 if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { 356 assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); 357 // Mark this register to be non-renamable. 358 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 359 // Move the def index to the end of the previous region, to reflect 360 // that the def could theoretically have been scheduled at the end. 361 DefIndices[Reg] = InsertPosIndex; 362 } 363 364 PrescanInstruction(MI); 365 ScanInstruction(MI, Count); 366} 367 368/// FinishBlock - Clean up register live-range state. 369/// 370void SchedulePostRATDList::FinishBlock() { 371 RegRefs.clear(); 372 373 // Call the superclass. 374 ScheduleDAGInstrs::FinishBlock(); 375} 376 377/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 378/// critical path. 379static SDep *CriticalPathStep(SUnit *SU) { 380 SDep *Next = 0; 381 unsigned NextDepth = 0; 382 // Find the predecessor edge with the greatest depth. 383 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 384 P != PE; ++P) { 385 SUnit *PredSU = P->getSUnit(); 386 unsigned PredLatency = P->getLatency(); 387 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 388 // In the case of a latency tie, prefer an anti-dependency edge over 389 // other types of edges. 390 if (NextDepth < PredTotalLatency || 391 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 392 NextDepth = PredTotalLatency; 393 Next = &*P; 394 } 395 } 396 return Next; 397} 398 399void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { 400 // Scan the register operands for this instruction and update 401 // Classes and RegRefs. 402 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 403 MachineOperand &MO = MI->getOperand(i); 404 if (!MO.isReg()) continue; 405 unsigned Reg = MO.getReg(); 406 if (Reg == 0) continue; 407 const TargetRegisterClass *NewRC = 0; 408 409 if (i < MI->getDesc().getNumOperands()) 410 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 411 412 // For now, only allow the register to be changed if its register 413 // class is consistent across all uses. 414 if (!Classes[Reg] && NewRC) 415 Classes[Reg] = NewRC; 416 else if (!NewRC || Classes[Reg] != NewRC) 417 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 418 419 // Now check for aliases. 420 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 421 // If an alias of the reg is used during the live range, give up. 422 // Note that this allows us to skip checking if AntiDepReg 423 // overlaps with any of the aliases, among other things. 424 unsigned AliasReg = *Alias; 425 if (Classes[AliasReg]) { 426 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 427 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 428 } 429 } 430 431 // If we're still willing to consider this register, note the reference. 432 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 433 RegRefs.insert(std::make_pair(Reg, &MO)); 434 } 435} 436 437void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, 438 unsigned Count) { 439 // Update liveness. 440 // Proceding upwards, registers that are defed but not used in this 441 // instruction are now dead. 442 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 443 MachineOperand &MO = MI->getOperand(i); 444 if (!MO.isReg()) continue; 445 unsigned Reg = MO.getReg(); 446 if (Reg == 0) continue; 447 if (!MO.isDef()) continue; 448 // Ignore two-addr defs. 449 if (MI->isRegTiedToUseOperand(i)) continue; 450 451 DefIndices[Reg] = Count; 452 KillIndices[Reg] = ~0u; 453 assert(((KillIndices[Reg] == ~0u) != 454 (DefIndices[Reg] == ~0u)) && 455 "Kill and Def maps aren't consistent for Reg!"); 456 Classes[Reg] = 0; 457 RegRefs.erase(Reg); 458 // Repeat, for all subregs. 459 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 460 *Subreg; ++Subreg) { 461 unsigned SubregReg = *Subreg; 462 DefIndices[SubregReg] = Count; 463 KillIndices[SubregReg] = ~0u; 464 Classes[SubregReg] = 0; 465 RegRefs.erase(SubregReg); 466 } 467 // Conservatively mark super-registers as unusable. 468 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 469 *Super; ++Super) { 470 unsigned SuperReg = *Super; 471 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 472 } 473 } 474 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 475 MachineOperand &MO = MI->getOperand(i); 476 if (!MO.isReg()) continue; 477 unsigned Reg = MO.getReg(); 478 if (Reg == 0) continue; 479 if (!MO.isUse()) continue; 480 481 const TargetRegisterClass *NewRC = 0; 482 if (i < MI->getDesc().getNumOperands()) 483 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 484 485 // For now, only allow the register to be changed if its register 486 // class is consistent across all uses. 487 if (!Classes[Reg] && NewRC) 488 Classes[Reg] = NewRC; 489 else if (!NewRC || Classes[Reg] != NewRC) 490 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 491 492 RegRefs.insert(std::make_pair(Reg, &MO)); 493 494 // It wasn't previously live but now it is, this is a kill. 495 if (KillIndices[Reg] == ~0u) { 496 KillIndices[Reg] = Count; 497 DefIndices[Reg] = ~0u; 498 assert(((KillIndices[Reg] == ~0u) != 499 (DefIndices[Reg] == ~0u)) && 500 "Kill and Def maps aren't consistent for Reg!"); 501 } 502 // Repeat, for all aliases. 503 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 504 unsigned AliasReg = *Alias; 505 if (KillIndices[AliasReg] == ~0u) { 506 KillIndices[AliasReg] = Count; 507 DefIndices[AliasReg] = ~0u; 508 } 509 } 510 } 511} 512 513/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 514/// of the ScheduleDAG and break them by renaming registers. 515/// 516bool SchedulePostRATDList::BreakAntiDependencies() { 517 // The code below assumes that there is at least one instruction, 518 // so just duck out immediately if the block is empty. 519 if (SUnits.empty()) return false; 520 521 // Find the node at the bottom of the critical path. 522 SUnit *Max = 0; 523 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 524 SUnit *SU = &SUnits[i]; 525 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 526 Max = SU; 527 } 528 529 DEBUG(errs() << "Critical path has total latency " 530 << (Max->getDepth() + Max->Latency) << "\n"); 531 532 // Track progress along the critical path through the SUnit graph as we walk 533 // the instructions. 534 SUnit *CriticalPathSU = Max; 535 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 536 537 // Consider this pattern: 538 // A = ... 539 // ... = A 540 // A = ... 541 // ... = A 542 // A = ... 543 // ... = A 544 // A = ... 545 // ... = A 546 // There are three anti-dependencies here, and without special care, 547 // we'd break all of them using the same register: 548 // A = ... 549 // ... = A 550 // B = ... 551 // ... = B 552 // B = ... 553 // ... = B 554 // B = ... 555 // ... = B 556 // because at each anti-dependence, B is the first register that 557 // isn't A which is free. This re-introduces anti-dependencies 558 // at all but one of the original anti-dependencies that we were 559 // trying to break. To avoid this, keep track of the most recent 560 // register that each register was replaced with, avoid avoid 561 // using it to repair an anti-dependence on the same register. 562 // This lets us produce this: 563 // A = ... 564 // ... = A 565 // B = ... 566 // ... = B 567 // C = ... 568 // ... = C 569 // B = ... 570 // ... = B 571 // This still has an anti-dependence on B, but at least it isn't on the 572 // original critical path. 573 // 574 // TODO: If we tracked more than one register here, we could potentially 575 // fix that remaining critical edge too. This is a little more involved, 576 // because unlike the most recent register, less recent registers should 577 // still be considered, though only if no other registers are available. 578 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 579 580 // Attempt to break anti-dependence edges on the critical path. Walk the 581 // instructions from the bottom up, tracking information about liveness 582 // as we go to help determine which registers are available. 583 bool Changed = false; 584 unsigned Count = InsertPosIndex - 1; 585 for (MachineBasicBlock::iterator I = InsertPos, E = Begin; 586 I != E; --Count) { 587 MachineInstr *MI = --I; 588 589 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 590 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 591 // is left behind appearing to clobber the super-register, while the 592 // subregister needs to remain live. So we just ignore them. 593 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 594 continue; 595 596 // Check if this instruction has a dependence on the critical path that 597 // is an anti-dependence that we may be able to break. If it is, set 598 // AntiDepReg to the non-zero register associated with the anti-dependence. 599 // 600 // We limit our attention to the critical path as a heuristic to avoid 601 // breaking anti-dependence edges that aren't going to significantly 602 // impact the overall schedule. There are a limited number of registers 603 // and we want to save them for the important edges. 604 // 605 // TODO: Instructions with multiple defs could have multiple 606 // anti-dependencies. The current code here only knows how to break one 607 // edge per instruction. Note that we'd have to be able to break all of 608 // the anti-dependencies in an instruction in order to be effective. 609 unsigned AntiDepReg = 0; 610 if (MI == CriticalPathMI) { 611 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 612 SUnit *NextSU = Edge->getSUnit(); 613 614 // Only consider anti-dependence edges. 615 if (Edge->getKind() == SDep::Anti) { 616 AntiDepReg = Edge->getReg(); 617 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 618 // Don't break anti-dependencies on non-allocatable registers. 619 if (!AllocatableSet.test(AntiDepReg)) 620 AntiDepReg = 0; 621 else { 622 // If the SUnit has other dependencies on the SUnit that it 623 // anti-depends on, don't bother breaking the anti-dependency 624 // since those edges would prevent such units from being 625 // scheduled past each other regardless. 626 // 627 // Also, if there are dependencies on other SUnits with the 628 // same register as the anti-dependency, don't attempt to 629 // break it. 630 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 631 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 632 if (P->getSUnit() == NextSU ? 633 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 634 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 635 AntiDepReg = 0; 636 break; 637 } 638 } 639 } 640 CriticalPathSU = NextSU; 641 CriticalPathMI = CriticalPathSU->getInstr(); 642 } else { 643 // We've reached the end of the critical path. 644 CriticalPathSU = 0; 645 CriticalPathMI = 0; 646 } 647 } 648 649 PrescanInstruction(MI); 650 651 // If this instruction has a use of AntiDepReg, breaking it 652 // is invalid. 653 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 654 MachineOperand &MO = MI->getOperand(i); 655 if (!MO.isReg()) continue; 656 unsigned Reg = MO.getReg(); 657 if (Reg == 0) continue; 658 if (MO.isUse() && AntiDepReg == Reg) { 659 AntiDepReg = 0; 660 break; 661 } 662 } 663 664 // Determine AntiDepReg's register class, if it is live and is 665 // consistently used within a single class. 666 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 667 assert((AntiDepReg == 0 || RC != NULL) && 668 "Register should be live if it's causing an anti-dependence!"); 669 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 670 AntiDepReg = 0; 671 672 // Look for a suitable register to use to break the anti-depenence. 673 // 674 // TODO: Instead of picking the first free register, consider which might 675 // be the best. 676 if (AntiDepReg != 0) { 677 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 678 RE = RC->allocation_order_end(MF); R != RE; ++R) { 679 unsigned NewReg = *R; 680 // Don't replace a register with itself. 681 if (NewReg == AntiDepReg) continue; 682 // Don't replace a register with one that was recently used to repair 683 // an anti-dependence with this AntiDepReg, because that would 684 // re-introduce that anti-dependence. 685 if (NewReg == LastNewReg[AntiDepReg]) continue; 686 // If NewReg is dead and NewReg's most recent def is not before 687 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 688 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 689 "Kill and Def maps aren't consistent for AntiDepReg!"); 690 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 691 "Kill and Def maps aren't consistent for NewReg!"); 692 if (KillIndices[NewReg] == ~0u && 693 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 694 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 695 DEBUG(errs() << "Breaking anti-dependence edge on " 696 << TRI->getName(AntiDepReg) 697 << " with " << RegRefs.count(AntiDepReg) << " references" 698 << " using " << TRI->getName(NewReg) << "!\n"); 699 700 // Update the references to the old register to refer to the new 701 // register. 702 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 703 std::multimap<unsigned, MachineOperand *>::iterator> 704 Range = RegRefs.equal_range(AntiDepReg); 705 for (std::multimap<unsigned, MachineOperand *>::iterator 706 Q = Range.first, QE = Range.second; Q != QE; ++Q) 707 Q->second->setReg(NewReg); 708 709 // We just went back in time and modified history; the 710 // liveness information for the anti-depenence reg is now 711 // inconsistent. Set the state as if it were dead. 712 Classes[NewReg] = Classes[AntiDepReg]; 713 DefIndices[NewReg] = DefIndices[AntiDepReg]; 714 KillIndices[NewReg] = KillIndices[AntiDepReg]; 715 assert(((KillIndices[NewReg] == ~0u) != 716 (DefIndices[NewReg] == ~0u)) && 717 "Kill and Def maps aren't consistent for NewReg!"); 718 719 Classes[AntiDepReg] = 0; 720 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 721 KillIndices[AntiDepReg] = ~0u; 722 assert(((KillIndices[AntiDepReg] == ~0u) != 723 (DefIndices[AntiDepReg] == ~0u)) && 724 "Kill and Def maps aren't consistent for AntiDepReg!"); 725 726 RegRefs.erase(AntiDepReg); 727 Changed = true; 728 LastNewReg[AntiDepReg] = NewReg; 729 break; 730 } 731 } 732 } 733 734 ScanInstruction(MI, Count); 735 } 736 737 return Changed; 738} 739 740//===----------------------------------------------------------------------===// 741// Top-Down Scheduling 742//===----------------------------------------------------------------------===// 743 744/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 745/// the PendingQueue if the count reaches zero. Also update its cycle bound. 746void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 747 SUnit *SuccSU = SuccEdge->getSUnit(); 748 --SuccSU->NumPredsLeft; 749 750#ifndef NDEBUG 751 if (SuccSU->NumPredsLeft < 0) { 752 cerr << "*** Scheduling failed! ***\n"; 753 SuccSU->dump(this); 754 cerr << " has been released too many times!\n"; 755 llvm_unreachable(0); 756 } 757#endif 758 759 // Compute how many cycles it will be before this actually becomes 760 // available. This is the max of the start time of all predecessors plus 761 // their latencies. 762 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 763 764 // If all the node's predecessors are scheduled, this node is ready 765 // to be scheduled. Ignore the special ExitSU node. 766 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 767 PendingQueue.push_back(SuccSU); 768} 769 770/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. 771void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { 772 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 773 I != E; ++I) 774 ReleaseSucc(SU, &*I); 775} 776 777/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 778/// count of its successors. If a successor pending count is zero, add it to 779/// the Available queue. 780void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 781 DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); 782 DEBUG(SU->dump(this)); 783 784 Sequence.push_back(SU); 785 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 786 SU->setDepthToAtLeast(CurCycle); 787 788 ReleaseSuccessors(SU); 789 SU->isScheduled = true; 790 AvailableQueue.ScheduledNode(SU); 791} 792 793/// ListScheduleTopDown - The main loop of list scheduling for top-down 794/// schedulers. 795void SchedulePostRATDList::ListScheduleTopDown() { 796 unsigned CurCycle = 0; 797 798 // Release any successors of the special Entry node. 799 ReleaseSuccessors(&EntrySU); 800 801 // All leaves to Available queue. 802 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 803 // It is available if it has no predecessors. 804 if (SUnits[i].Preds.empty()) { 805 AvailableQueue.push(&SUnits[i]); 806 SUnits[i].isAvailable = true; 807 } 808 } 809 810 // While Available queue is not empty, grab the node with the highest 811 // priority. If it is not ready put it back. Schedule the node. 812 std::vector<SUnit*> NotReady; 813 Sequence.reserve(SUnits.size()); 814 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 815 // Check to see if any of the pending instructions are ready to issue. If 816 // so, add them to the available queue. 817 unsigned MinDepth = ~0u; 818 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 819 if (PendingQueue[i]->getDepth() <= CurCycle) { 820 AvailableQueue.push(PendingQueue[i]); 821 PendingQueue[i]->isAvailable = true; 822 PendingQueue[i] = PendingQueue.back(); 823 PendingQueue.pop_back(); 824 --i; --e; 825 } else if (PendingQueue[i]->getDepth() < MinDepth) 826 MinDepth = PendingQueue[i]->getDepth(); 827 } 828 829 SUnit *FoundSUnit = 0; 830 831 bool HasNoopHazards = false; 832 while (!AvailableQueue.empty()) { 833 SUnit *CurSUnit = AvailableQueue.pop(); 834 835 ScheduleHazardRecognizer::HazardType HT = 836 HazardRec->getHazardType(CurSUnit); 837 if (HT == ScheduleHazardRecognizer::NoHazard) { 838 FoundSUnit = CurSUnit; 839 break; 840 } 841 842 // Remember if this is a noop hazard. 843 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 844 845 NotReady.push_back(CurSUnit); 846 } 847 848 // Add the nodes that aren't ready back onto the available list. 849 if (!NotReady.empty()) { 850 AvailableQueue.push_all(NotReady); 851 NotReady.clear(); 852 } 853 854 // If we found a node to schedule, do it now. 855 if (FoundSUnit) { 856 ScheduleNodeTopDown(FoundSUnit, CurCycle); 857 HazardRec->EmitInstruction(FoundSUnit); 858 859 // If we are using the target-specific hazards, then don't 860 // advance the cycle time just because we schedule a node. If 861 // the target allows it we can schedule multiple nodes in the 862 // same cycle. 863 if (!EnablePostRAHazardAvoidance) { 864 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 865 ++CurCycle; 866 } 867 } else if (!HasNoopHazards) { 868 // Otherwise, we have a pipeline stall, but no other problem, just advance 869 // the current cycle and try again. 870 DEBUG(errs() << "*** Advancing cycle, no work to do\n"); 871 HazardRec->AdvanceCycle(); 872 ++NumStalls; 873 ++CurCycle; 874 } else { 875 // Otherwise, we have no instructions to issue and we have instructions 876 // that will fault if we don't do this right. This is the case for 877 // processors without pipeline interlocks and other cases. 878 DEBUG(errs() << "*** Emitting noop\n"); 879 HazardRec->EmitNoop(); 880 Sequence.push_back(0); // NULL here means noop 881 ++NumNoops; 882 ++CurCycle; 883 } 884 } 885 886#ifndef NDEBUG 887 VerifySchedule(/*isBottomUp=*/false); 888#endif 889} 890 891//===----------------------------------------------------------------------===// 892// Public Constructor Functions 893//===----------------------------------------------------------------------===// 894 895FunctionPass *llvm::createPostRAScheduler() { 896 return new PostRAScheduler(); 897} 898