PostRASchedulerList.cpp revision bed353d0163a6b17beecc20c23b67de9b06e7b5c
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "ScheduleDAGInstrs.h" 23#include "llvm/CodeGen/Passes.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31#include "llvm/Target/TargetLowering.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Target/TargetInstrInfo.h" 34#include "llvm/Target/TargetRegisterInfo.h" 35#include "llvm/Support/Compiler.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/ADT/Statistic.h" 38#include <map> 39using namespace llvm; 40 41STATISTIC(NumNoops, "Number of noops inserted"); 42STATISTIC(NumStalls, "Number of pipeline stalls"); 43 44static cl::opt<bool> 45EnableAntiDepBreaking("break-anti-dependencies", 46 cl::desc("Break post-RA scheduling anti-dependencies"), 47 cl::init(true), cl::Hidden); 48 49static cl::opt<bool> 50EnablePostRAHazardAvoidance("avoid-hazards", 51 cl::desc("Enable simple hazard-avoidance"), 52 cl::init(true), cl::Hidden); 53 54namespace { 55 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 56 public: 57 static char ID; 58 PostRAScheduler() : MachineFunctionPass(&ID) {} 59 60 void getAnalysisUsage(AnalysisUsage &AU) const { 61 AU.addRequired<MachineDominatorTree>(); 62 AU.addPreserved<MachineDominatorTree>(); 63 AU.addRequired<MachineLoopInfo>(); 64 AU.addPreserved<MachineLoopInfo>(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 68 const char *getPassName() const { 69 return "Post RA top-down list latency scheduler"; 70 } 71 72 bool runOnMachineFunction(MachineFunction &Fn); 73 }; 74 char PostRAScheduler::ID = 0; 75 76 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 77 /// AvailableQueue - The priority queue to use for the available SUnits. 78 /// 79 LatencyPriorityQueue AvailableQueue; 80 81 /// PendingQueue - This contains all of the instructions whose operands have 82 /// been issued, but their results are not ready yet (due to the latency of 83 /// the operation). Once the operands becomes available, the instruction is 84 /// added to the AvailableQueue. 85 std::vector<SUnit*> PendingQueue; 86 87 /// Topo - A topological ordering for SUnits. 88 ScheduleDAGTopologicalSort Topo; 89 90 /// AllocatableSet - The set of allocatable registers. 91 /// We'll be ignoring anti-dependencies on non-allocatable registers, 92 /// because they may not be safe to break. 93 const BitVector AllocatableSet; 94 95 /// HazardRec - The hazard recognizer to use. 96 ScheduleHazardRecognizer *HazardRec; 97 98 /// Classes - For live regs that are only used in one register class in a 99 /// live range, the register class. If the register is not live, the 100 /// corresponding value is null. If the register is live but used in 101 /// multiple register classes, the corresponding value is -1 casted to a 102 /// pointer. 103 const TargetRegisterClass * 104 Classes[TargetRegisterInfo::FirstVirtualRegister]; 105 106 /// RegRegs - Map registers to all their references within a live range. 107 std::multimap<unsigned, MachineOperand *> RegRefs; 108 109 /// The index of the most recent kill (proceding bottom-up), or ~0u if 110 /// the register is not live. 111 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 112 113 /// The index of the most recent complete def (proceding bottom up), or ~0u 114 /// if the register is live. 115 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 116 117 public: 118 SchedulePostRATDList(MachineFunction &MF, 119 const MachineLoopInfo &MLI, 120 const MachineDominatorTree &MDT, 121 ScheduleHazardRecognizer *HR) 122 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 123 AllocatableSet(TRI->getAllocatableSet(MF)), 124 HazardRec(HR) {} 125 126 ~SchedulePostRATDList() { 127 delete HazardRec; 128 } 129 130 /// StartBlock - Initialize register live-range state for scheduling in 131 /// this block. 132 /// 133 void StartBlock(MachineBasicBlock *BB); 134 135 /// Schedule - Schedule the instruction range using list scheduling. 136 /// 137 void Schedule(); 138 139 /// Observe - Update liveness information to account for the current 140 /// instruction, which will not be scheduled. 141 /// 142 void Observe(MachineInstr *MI); 143 144 /// FinishBlock - Clean up register live-range state. 145 /// 146 void FinishBlock(); 147 148 private: 149 void PrescanInstruction(MachineInstr *MI); 150 void ScanInstruction(MachineInstr *MI, unsigned Count); 151 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 152 void ReleaseSuccessors(SUnit *SU); 153 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 154 void ListScheduleTopDown(); 155 bool BreakAntiDependencies(); 156 }; 157 158 /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses 159 /// a coarse classification and attempts to avoid that instructions of 160 /// a given class aren't grouped too densely together. 161 class SimpleHazardRecognizer : public ScheduleHazardRecognizer { 162 /// Class - A simple classification for SUnits. 163 enum Class { 164 Other, Load, Store 165 }; 166 167 /// Window - The Class values of the most recently issued 168 /// instructions. 169 Class Window[8]; 170 171 /// getClass - Classify the given SUnit. 172 Class getClass(const SUnit *SU) { 173 const MachineInstr *MI = SU->getInstr(); 174 const TargetInstrDesc &TID = MI->getDesc(); 175 if (TID.mayLoad()) 176 return Load; 177 if (TID.mayStore()) 178 return Store; 179 return Other; 180 } 181 182 /// Step - Rotate the existing entries in Window and insert the 183 /// given class value in position as the most recent. 184 void Step(Class C) { 185 std::copy(Window+1, array_endof(Window), Window); 186 Window[array_lengthof(Window)-1] = C; 187 } 188 189 public: 190 SimpleHazardRecognizer() : Window() {} 191 192 virtual HazardType getHazardType(SUnit *SU) { 193 Class C = getClass(SU); 194 if (C == Other) 195 return NoHazard; 196 unsigned Score = 0; 197 for (unsigned i = 0; i != array_lengthof(Window); ++i) 198 if (Window[i] == C) 199 Score += i + 1; 200 if (Score > array_lengthof(Window) * 2) 201 return Hazard; 202 return NoHazard; 203 } 204 205 virtual void EmitInstruction(SUnit *SU) { 206 Step(getClass(SU)); 207 } 208 209 virtual void AdvanceCycle() { 210 Step(Other); 211 } 212 }; 213} 214 215/// isSchedulingBoundary - Test if the given instruction should be 216/// considered a scheduling boundary. This primarily includes labels 217/// and terminators. 218/// 219static bool isSchedulingBoundary(const MachineInstr *MI, 220 const MachineFunction &MF) { 221 // Terminators and labels can't be scheduled around. 222 if (MI->getDesc().isTerminator() || MI->isLabel()) 223 return true; 224 225 // Don't attempt to schedule around any instruction that modifies 226 // a stack-oriented pointer, as it's unlikely to be profitable. This 227 // saves compile time, because it doesn't require every single 228 // stack slot reference to depend on the instruction that does the 229 // modification. 230 const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); 231 if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) 232 return true; 233 234 return false; 235} 236 237bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 238 DOUT << "PostRAScheduler\n"; 239 240 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 241 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 242 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 243 new SimpleHazardRecognizer : 244 new ScheduleHazardRecognizer(); 245 246 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 247 248 // Loop over all of the basic blocks 249 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 250 MBB != MBBe; ++MBB) { 251 // Initialize register live-range state for scheduling in this block. 252 Scheduler.StartBlock(MBB); 253 254 // Schedule each sequence of instructions not interrupted by a label 255 // or anything else that effectively needs to shut down scheduling. 256 MachineBasicBlock::iterator Current = MBB->end(); 257 for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { 258 MachineInstr *MI = prior(I); 259 if (isSchedulingBoundary(MI, Fn)) { 260 if (I != Current) { 261 Scheduler.Run(0, MBB, I, Current); 262 Scheduler.EmitSchedule(); 263 } 264 Scheduler.Observe(MI); 265 Current = MI; 266 } 267 I = MI; 268 } 269 Scheduler.Run(0, MBB, MBB->begin(), Current); 270 Scheduler.EmitSchedule(); 271 272 // Clean up register live-range state. 273 Scheduler.FinishBlock(); 274 } 275 276 return true; 277} 278 279/// StartBlock - Initialize register live-range state for scheduling in 280/// this block. 281/// 282void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { 283 // Call the superclass. 284 ScheduleDAGInstrs::StartBlock(BB); 285 286 // Clear out the register class data. 287 std::fill(Classes, array_endof(Classes), 288 static_cast<const TargetRegisterClass *>(0)); 289 290 // Initialize the indices to indicate that no registers are live. 291 std::fill(KillIndices, array_endof(KillIndices), ~0u); 292 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 293 294 // Determine the live-out physregs for this block. 295 if (!BB->empty() && BB->back().getDesc().isReturn()) 296 // In a return block, examine the function live-out regs. 297 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 298 E = MRI.liveout_end(); I != E; ++I) { 299 unsigned Reg = *I; 300 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 301 KillIndices[Reg] = BB->size(); 302 DefIndices[Reg] = ~0u; 303 // Repeat, for all aliases. 304 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 305 unsigned AliasReg = *Alias; 306 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 307 KillIndices[AliasReg] = BB->size(); 308 DefIndices[AliasReg] = ~0u; 309 } 310 } 311 else 312 // In a non-return block, examine the live-in regs of all successors. 313 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 314 SE = BB->succ_end(); SI != SE; ++SI) 315 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 316 E = (*SI)->livein_end(); I != E; ++I) { 317 unsigned Reg = *I; 318 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 319 KillIndices[Reg] = BB->size(); 320 DefIndices[Reg] = ~0u; 321 // Repeat, for all aliases. 322 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 323 unsigned AliasReg = *Alias; 324 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 325 KillIndices[AliasReg] = BB->size(); 326 DefIndices[AliasReg] = ~0u; 327 } 328 } 329 330 // Consider callee-saved registers as live-out, since we're running after 331 // prologue/epilogue insertion so there's no way to add additional 332 // saved registers. 333 // 334 // TODO: If the callee saves and restores these, then we can potentially 335 // use them between the save and the restore. To do that, we could scan 336 // the exit blocks to see which of these registers are defined. 337 // Alternatively, callee-saved registers that aren't saved and restored 338 // could be marked live-in in every block. 339 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 340 unsigned Reg = *I; 341 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 342 KillIndices[Reg] = BB->size(); 343 DefIndices[Reg] = ~0u; 344 // Repeat, for all aliases. 345 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 346 unsigned AliasReg = *Alias; 347 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 348 KillIndices[AliasReg] = BB->size(); 349 DefIndices[AliasReg] = ~0u; 350 } 351 } 352} 353 354/// Schedule - Schedule the instruction range using list scheduling. 355/// 356void SchedulePostRATDList::Schedule() { 357 DOUT << "********** List Scheduling **********\n"; 358 359 // Build the scheduling graph. 360 BuildSchedGraph(); 361 362 if (EnableAntiDepBreaking) { 363 if (BreakAntiDependencies()) { 364 // We made changes. Update the dependency graph. 365 // Theoretically we could update the graph in place: 366 // When a live range is changed to use a different register, remove 367 // the def's anti-dependence *and* output-dependence edges due to 368 // that register, and add new anti-dependence and output-dependence 369 // edges based on the next live range of the register. 370 SUnits.clear(); 371 EntrySU = SUnit(); 372 ExitSU = SUnit(); 373 BuildSchedGraph(); 374 } 375 } 376 377 AvailableQueue.initNodes(SUnits); 378 379 ListScheduleTopDown(); 380 381 AvailableQueue.releaseState(); 382} 383 384/// Observe - Update liveness information to account for the current 385/// instruction, which will not be scheduled. 386/// 387void SchedulePostRATDList::Observe(MachineInstr *MI) { 388 PrescanInstruction(MI); 389 ScanInstruction(MI, 0); 390} 391 392/// FinishBlock - Clean up register live-range state. 393/// 394void SchedulePostRATDList::FinishBlock() { 395 RegRefs.clear(); 396 397 // Call the superclass. 398 ScheduleDAGInstrs::FinishBlock(); 399} 400 401/// getInstrOperandRegClass - Return register class of the operand of an 402/// instruction of the specified TargetInstrDesc. 403static const TargetRegisterClass* 404getInstrOperandRegClass(const TargetRegisterInfo *TRI, 405 const TargetInstrDesc &II, unsigned Op) { 406 if (Op >= II.getNumOperands()) 407 return NULL; 408 if (II.OpInfo[Op].isLookupPtrRegClass()) 409 return TRI->getPointerRegClass(); 410 return TRI->getRegClass(II.OpInfo[Op].RegClass); 411} 412 413/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 414/// critical path. 415static SDep *CriticalPathStep(SUnit *SU) { 416 SDep *Next = 0; 417 unsigned NextDepth = 0; 418 // Find the predecessor edge with the greatest depth. 419 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 420 P != PE; ++P) { 421 SUnit *PredSU = P->getSUnit(); 422 unsigned PredLatency = P->getLatency(); 423 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 424 // In the case of a latency tie, prefer an anti-dependency edge over 425 // other types of edges. 426 if (NextDepth < PredTotalLatency || 427 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 428 NextDepth = PredTotalLatency; 429 Next = &*P; 430 } 431 } 432 return Next; 433} 434 435void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { 436 // Scan the register operands for this instruction and update 437 // Classes and RegRefs. 438 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 439 MachineOperand &MO = MI->getOperand(i); 440 if (!MO.isReg()) continue; 441 unsigned Reg = MO.getReg(); 442 if (Reg == 0) continue; 443 const TargetRegisterClass *NewRC = 444 getInstrOperandRegClass(TRI, MI->getDesc(), i); 445 446 // For now, only allow the register to be changed if its register 447 // class is consistent across all uses. 448 if (!Classes[Reg] && NewRC) 449 Classes[Reg] = NewRC; 450 else if (!NewRC || Classes[Reg] != NewRC) 451 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 452 453 // Now check for aliases. 454 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 455 // If an alias of the reg is used during the live range, give up. 456 // Note that this allows us to skip checking if AntiDepReg 457 // overlaps with any of the aliases, among other things. 458 unsigned AliasReg = *Alias; 459 if (Classes[AliasReg]) { 460 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 461 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 462 } 463 } 464 465 // If we're still willing to consider this register, note the reference. 466 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 467 RegRefs.insert(std::make_pair(Reg, &MO)); 468 } 469} 470 471void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, 472 unsigned Count) { 473 // Update liveness. 474 // Proceding upwards, registers that are defed but not used in this 475 // instruction are now dead. 476 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 477 MachineOperand &MO = MI->getOperand(i); 478 if (!MO.isReg()) continue; 479 unsigned Reg = MO.getReg(); 480 if (Reg == 0) continue; 481 if (!MO.isDef()) continue; 482 // Ignore two-addr defs. 483 if (MI->isRegReDefinedByTwoAddr(i)) continue; 484 485 DefIndices[Reg] = Count; 486 KillIndices[Reg] = ~0u; 487 Classes[Reg] = 0; 488 RegRefs.erase(Reg); 489 // Repeat, for all subregs. 490 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 491 *Subreg; ++Subreg) { 492 unsigned SubregReg = *Subreg; 493 DefIndices[SubregReg] = Count; 494 KillIndices[SubregReg] = ~0u; 495 Classes[SubregReg] = 0; 496 RegRefs.erase(SubregReg); 497 } 498 // Conservatively mark super-registers as unusable. 499 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 500 *Super; ++Super) { 501 unsigned SuperReg = *Super; 502 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 503 } 504 } 505 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 506 MachineOperand &MO = MI->getOperand(i); 507 if (!MO.isReg()) continue; 508 unsigned Reg = MO.getReg(); 509 if (Reg == 0) continue; 510 if (!MO.isUse()) continue; 511 512 const TargetRegisterClass *NewRC = 513 getInstrOperandRegClass(TRI, MI->getDesc(), i); 514 515 // For now, only allow the register to be changed if its register 516 // class is consistent across all uses. 517 if (!Classes[Reg] && NewRC) 518 Classes[Reg] = NewRC; 519 else if (!NewRC || Classes[Reg] != NewRC) 520 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 521 522 RegRefs.insert(std::make_pair(Reg, &MO)); 523 524 // It wasn't previously live but now it is, this is a kill. 525 if (KillIndices[Reg] == ~0u) { 526 KillIndices[Reg] = Count; 527 DefIndices[Reg] = ~0u; 528 } 529 // Repeat, for all aliases. 530 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 531 unsigned AliasReg = *Alias; 532 if (KillIndices[AliasReg] == ~0u) { 533 KillIndices[AliasReg] = Count; 534 DefIndices[AliasReg] = ~0u; 535 } 536 } 537 } 538} 539 540/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 541/// of the ScheduleDAG and break them by renaming registers. 542/// 543bool SchedulePostRATDList::BreakAntiDependencies() { 544 // The code below assumes that there is at least one instruction, 545 // so just duck out immediately if the block is empty. 546 if (SUnits.empty()) return false; 547 548 // Find the node at the bottom of the critical path. 549 SUnit *Max = 0; 550 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 551 SUnit *SU = &SUnits[i]; 552 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 553 Max = SU; 554 } 555 556 DOUT << "Critical path has total latency " 557 << (Max->getDepth() + Max->Latency) << "\n"; 558 559 // Track progress along the critical path through the SUnit graph as we walk 560 // the instructions. 561 SUnit *CriticalPathSU = Max; 562 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 563 564 // Consider this pattern: 565 // A = ... 566 // ... = A 567 // A = ... 568 // ... = A 569 // A = ... 570 // ... = A 571 // A = ... 572 // ... = A 573 // There are three anti-dependencies here, and without special care, 574 // we'd break all of them using the same register: 575 // A = ... 576 // ... = A 577 // B = ... 578 // ... = B 579 // B = ... 580 // ... = B 581 // B = ... 582 // ... = B 583 // because at each anti-dependence, B is the first register that 584 // isn't A which is free. This re-introduces anti-dependencies 585 // at all but one of the original anti-dependencies that we were 586 // trying to break. To avoid this, keep track of the most recent 587 // register that each register was replaced with, avoid avoid 588 // using it to repair an anti-dependence on the same register. 589 // This lets us produce this: 590 // A = ... 591 // ... = A 592 // B = ... 593 // ... = B 594 // C = ... 595 // ... = C 596 // B = ... 597 // ... = B 598 // This still has an anti-dependence on B, but at least it isn't on the 599 // original critical path. 600 // 601 // TODO: If we tracked more than one register here, we could potentially 602 // fix that remaining critical edge too. This is a little more involved, 603 // because unlike the most recent register, less recent registers should 604 // still be considered, though only if no other registers are available. 605 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 606 607 // Attempt to break anti-dependence edges on the critical path. Walk the 608 // instructions from the bottom up, tracking information about liveness 609 // as we go to help determine which registers are available. 610 bool Changed = false; 611 unsigned Count = SUnits.size() - 1; 612 for (MachineBasicBlock::iterator I = End, E = Begin; 613 I != E; --Count) { 614 MachineInstr *MI = --I; 615 616 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 617 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 618 // is left behind appearing to clobber the super-register, while the 619 // subregister needs to remain live. So we just ignore them. 620 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 621 continue; 622 623 // Check if this instruction has a dependence on the critical path that 624 // is an anti-dependence that we may be able to break. If it is, set 625 // AntiDepReg to the non-zero register associated with the anti-dependence. 626 // 627 // We limit our attention to the critical path as a heuristic to avoid 628 // breaking anti-dependence edges that aren't going to significantly 629 // impact the overall schedule. There are a limited number of registers 630 // and we want to save them for the important edges. 631 // 632 // TODO: Instructions with multiple defs could have multiple 633 // anti-dependencies. The current code here only knows how to break one 634 // edge per instruction. Note that we'd have to be able to break all of 635 // the anti-dependencies in an instruction in order to be effective. 636 unsigned AntiDepReg = 0; 637 if (MI == CriticalPathMI) { 638 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 639 SUnit *NextSU = Edge->getSUnit(); 640 641 // Only consider anti-dependence edges. 642 if (Edge->getKind() == SDep::Anti) { 643 AntiDepReg = Edge->getReg(); 644 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 645 // Don't break anti-dependencies on non-allocatable registers. 646 if (!AllocatableSet.test(AntiDepReg)) 647 AntiDepReg = 0; 648 else { 649 // If the SUnit has other dependencies on the SUnit that it 650 // anti-depends on, don't bother breaking the anti-dependency 651 // since those edges would prevent such units from being 652 // scheduled past each other regardless. 653 // 654 // Also, if there are dependencies on other SUnits with the 655 // same register as the anti-dependency, don't attempt to 656 // break it. 657 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 658 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 659 if (P->getSUnit() == NextSU ? 660 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 661 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 662 AntiDepReg = 0; 663 break; 664 } 665 } 666 } 667 CriticalPathSU = NextSU; 668 CriticalPathMI = CriticalPathSU->getInstr(); 669 } else { 670 // We've reached the end of the critical path. 671 CriticalPathSU = 0; 672 CriticalPathMI = 0; 673 } 674 } 675 676 PrescanInstruction(MI); 677 678 // If this instruction has a use of AntiDepReg, breaking it 679 // is invalid. 680 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 681 MachineOperand &MO = MI->getOperand(i); 682 if (!MO.isReg()) continue; 683 unsigned Reg = MO.getReg(); 684 if (Reg == 0) continue; 685 if (MO.isUse() && AntiDepReg == Reg) { 686 AntiDepReg = 0; 687 break; 688 } 689 } 690 691 // Determine AntiDepReg's register class, if it is live and is 692 // consistently used within a single class. 693 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 694 assert((AntiDepReg == 0 || RC != NULL) && 695 "Register should be live if it's causing an anti-dependence!"); 696 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 697 AntiDepReg = 0; 698 699 // Look for a suitable register to use to break the anti-depenence. 700 // 701 // TODO: Instead of picking the first free register, consider which might 702 // be the best. 703 if (AntiDepReg != 0) { 704 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 705 RE = RC->allocation_order_end(MF); R != RE; ++R) { 706 unsigned NewReg = *R; 707 // Don't replace a register with itself. 708 if (NewReg == AntiDepReg) continue; 709 // Don't replace a register with one that was recently used to repair 710 // an anti-dependence with this AntiDepReg, because that would 711 // re-introduce that anti-dependence. 712 if (NewReg == LastNewReg[AntiDepReg]) continue; 713 // If NewReg is dead and NewReg's most recent def is not before 714 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 715 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 716 "Kill and Def maps aren't consistent for AntiDepReg!"); 717 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 718 "Kill and Def maps aren't consistent for NewReg!"); 719 if (KillIndices[NewReg] == ~0u && 720 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 721 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 722 DOUT << "Breaking anti-dependence edge on " 723 << TRI->getName(AntiDepReg) 724 << " with " << RegRefs.count(AntiDepReg) << " references" 725 << " using " << TRI->getName(NewReg) << "!\n"; 726 727 // Update the references to the old register to refer to the new 728 // register. 729 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 730 std::multimap<unsigned, MachineOperand *>::iterator> 731 Range = RegRefs.equal_range(AntiDepReg); 732 for (std::multimap<unsigned, MachineOperand *>::iterator 733 Q = Range.first, QE = Range.second; Q != QE; ++Q) 734 Q->second->setReg(NewReg); 735 736 // We just went back in time and modified history; the 737 // liveness information for the anti-depenence reg is now 738 // inconsistent. Set the state as if it were dead. 739 Classes[NewReg] = Classes[AntiDepReg]; 740 DefIndices[NewReg] = DefIndices[AntiDepReg]; 741 KillIndices[NewReg] = KillIndices[AntiDepReg]; 742 743 Classes[AntiDepReg] = 0; 744 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 745 KillIndices[AntiDepReg] = ~0u; 746 747 RegRefs.erase(AntiDepReg); 748 Changed = true; 749 LastNewReg[AntiDepReg] = NewReg; 750 break; 751 } 752 } 753 } 754 755 ScanInstruction(MI, Count); 756 } 757 assert(Count == ~0u && "Count mismatch!"); 758 759 return Changed; 760} 761 762//===----------------------------------------------------------------------===// 763// Top-Down Scheduling 764//===----------------------------------------------------------------------===// 765 766/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 767/// the PendingQueue if the count reaches zero. Also update its cycle bound. 768void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 769 SUnit *SuccSU = SuccEdge->getSUnit(); 770 --SuccSU->NumPredsLeft; 771 772#ifndef NDEBUG 773 if (SuccSU->NumPredsLeft < 0) { 774 cerr << "*** Scheduling failed! ***\n"; 775 SuccSU->dump(this); 776 cerr << " has been released too many times!\n"; 777 assert(0); 778 } 779#endif 780 781 // Compute how many cycles it will be before this actually becomes 782 // available. This is the max of the start time of all predecessors plus 783 // their latencies. 784 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 785 786 // If all the node's predecessors are scheduled, this node is ready 787 // to be scheduled. Ignore the special ExitSU node. 788 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 789 PendingQueue.push_back(SuccSU); 790} 791 792/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. 793void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { 794 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 795 I != E; ++I) 796 ReleaseSucc(SU, &*I); 797} 798 799/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 800/// count of its successors. If a successor pending count is zero, add it to 801/// the Available queue. 802void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 803 DOUT << "*** Scheduling [" << CurCycle << "]: "; 804 DEBUG(SU->dump(this)); 805 806 Sequence.push_back(SU); 807 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 808 SU->setDepthToAtLeast(CurCycle); 809 810 ReleaseSuccessors(SU); 811 SU->isScheduled = true; 812 AvailableQueue.ScheduledNode(SU); 813} 814 815/// ListScheduleTopDown - The main loop of list scheduling for top-down 816/// schedulers. 817void SchedulePostRATDList::ListScheduleTopDown() { 818 unsigned CurCycle = 0; 819 820 // Release any successors of the special Entry node. 821 ReleaseSuccessors(&EntrySU); 822 823 // All leaves to Available queue. 824 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 825 // It is available if it has no predecessors. 826 if (SUnits[i].Preds.empty()) { 827 AvailableQueue.push(&SUnits[i]); 828 SUnits[i].isAvailable = true; 829 } 830 } 831 832 // While Available queue is not empty, grab the node with the highest 833 // priority. If it is not ready put it back. Schedule the node. 834 std::vector<SUnit*> NotReady; 835 Sequence.reserve(SUnits.size()); 836 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 837 // Check to see if any of the pending instructions are ready to issue. If 838 // so, add them to the available queue. 839 unsigned MinDepth = ~0u; 840 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 841 if (PendingQueue[i]->getDepth() <= CurCycle) { 842 AvailableQueue.push(PendingQueue[i]); 843 PendingQueue[i]->isAvailable = true; 844 PendingQueue[i] = PendingQueue.back(); 845 PendingQueue.pop_back(); 846 --i; --e; 847 } else if (PendingQueue[i]->getDepth() < MinDepth) 848 MinDepth = PendingQueue[i]->getDepth(); 849 } 850 851 // If there are no instructions available, don't try to issue anything, and 852 // don't advance the hazard recognizer. 853 if (AvailableQueue.empty()) { 854 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 855 continue; 856 } 857 858 SUnit *FoundSUnit = 0; 859 860 bool HasNoopHazards = false; 861 while (!AvailableQueue.empty()) { 862 SUnit *CurSUnit = AvailableQueue.pop(); 863 864 ScheduleHazardRecognizer::HazardType HT = 865 HazardRec->getHazardType(CurSUnit); 866 if (HT == ScheduleHazardRecognizer::NoHazard) { 867 FoundSUnit = CurSUnit; 868 break; 869 } 870 871 // Remember if this is a noop hazard. 872 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 873 874 NotReady.push_back(CurSUnit); 875 } 876 877 // Add the nodes that aren't ready back onto the available list. 878 if (!NotReady.empty()) { 879 AvailableQueue.push_all(NotReady); 880 NotReady.clear(); 881 } 882 883 // If we found a node to schedule, do it now. 884 if (FoundSUnit) { 885 ScheduleNodeTopDown(FoundSUnit, CurCycle); 886 HazardRec->EmitInstruction(FoundSUnit); 887 888 // If this is a pseudo-op node, we don't want to increment the current 889 // cycle. 890 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 891 ++CurCycle; 892 } else if (!HasNoopHazards) { 893 // Otherwise, we have a pipeline stall, but no other problem, just advance 894 // the current cycle and try again. 895 DOUT << "*** Advancing cycle, no work to do\n"; 896 HazardRec->AdvanceCycle(); 897 ++NumStalls; 898 ++CurCycle; 899 } else { 900 // Otherwise, we have no instructions to issue and we have instructions 901 // that will fault if we don't do this right. This is the case for 902 // processors without pipeline interlocks and other cases. 903 DOUT << "*** Emitting noop\n"; 904 HazardRec->EmitNoop(); 905 Sequence.push_back(0); // NULL here means noop 906 ++NumNoops; 907 ++CurCycle; 908 } 909 } 910 911#ifndef NDEBUG 912 VerifySchedule(/*isBottomUp=*/false); 913#endif 914} 915 916//===----------------------------------------------------------------------===// 917// Public Constructor Functions 918//===----------------------------------------------------------------------===// 919 920FunctionPass *llvm::createPostRAScheduler() { 921 return new PostRAScheduler(); 922} 923