PostRASchedulerList.cpp revision 544df3651e15f31ae2c14588c1272fd870560250
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "ExactHazardRecognizer.h" 23#include "SimpleHazardRecognizer.h" 24#include "ScheduleDAGInstrs.h" 25#include "llvm/CodeGen/Passes.h" 26#include "llvm/CodeGen/LatencyPriorityQueue.h" 27#include "llvm/CodeGen/SchedulerRegistry.h" 28#include "llvm/CodeGen/MachineDominators.h" 29#include "llvm/CodeGen/MachineFunctionPass.h" 30#include "llvm/CodeGen/MachineLoopInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 33#include "llvm/Target/TargetLowering.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Target/TargetInstrInfo.h" 36#include "llvm/Target/TargetRegisterInfo.h" 37#include "llvm/Support/Compiler.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/ErrorHandling.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/ADT/Statistic.h" 42#include <map> 43#include <set> 44using namespace llvm; 45 46STATISTIC(NumNoops, "Number of noops inserted"); 47STATISTIC(NumStalls, "Number of pipeline stalls"); 48 49static cl::opt<bool> 50EnableAntiDepBreaking("break-anti-dependencies", 51 cl::desc("Break post-RA scheduling anti-dependencies"), 52 cl::init(true), cl::Hidden); 53 54static cl::opt<bool> 55EnablePostRAHazardAvoidance("avoid-hazards", 56 cl::desc("Enable exact hazard avoidance"), 57 cl::init(true), cl::Hidden); 58 59// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 60static cl::opt<int> 61DebugDiv("postra-sched-debugdiv", 62 cl::desc("Debug control MBBs that are scheduled"), 63 cl::init(0), cl::Hidden); 64static cl::opt<int> 65DebugMod("postra-sched-debugmod", 66 cl::desc("Debug control MBBs that are scheduled"), 67 cl::init(0), cl::Hidden); 68 69namespace { 70 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 71 public: 72 static char ID; 73 PostRAScheduler() : MachineFunctionPass(&ID) {} 74 75 void getAnalysisUsage(AnalysisUsage &AU) const { 76 AU.setPreservesCFG(); 77 AU.addRequired<MachineDominatorTree>(); 78 AU.addPreserved<MachineDominatorTree>(); 79 AU.addRequired<MachineLoopInfo>(); 80 AU.addPreserved<MachineLoopInfo>(); 81 MachineFunctionPass::getAnalysisUsage(AU); 82 } 83 84 const char *getPassName() const { 85 return "Post RA top-down list latency scheduler"; 86 } 87 88 bool runOnMachineFunction(MachineFunction &Fn); 89 }; 90 char PostRAScheduler::ID = 0; 91 92 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 93 /// AvailableQueue - The priority queue to use for the available SUnits. 94 /// 95 LatencyPriorityQueue AvailableQueue; 96 97 /// PendingQueue - This contains all of the instructions whose operands have 98 /// been issued, but their results are not ready yet (due to the latency of 99 /// the operation). Once the operands becomes available, the instruction is 100 /// added to the AvailableQueue. 101 std::vector<SUnit*> PendingQueue; 102 103 /// Topo - A topological ordering for SUnits. 104 ScheduleDAGTopologicalSort Topo; 105 106 /// AllocatableSet - The set of allocatable registers. 107 /// We'll be ignoring anti-dependencies on non-allocatable registers, 108 /// because they may not be safe to break. 109 const BitVector AllocatableSet; 110 111 /// HazardRec - The hazard recognizer to use. 112 ScheduleHazardRecognizer *HazardRec; 113 114 /// Classes - For live regs that are only used in one register class in a 115 /// live range, the register class. If the register is not live, the 116 /// corresponding value is null. If the register is live but used in 117 /// multiple register classes, the corresponding value is -1 casted to a 118 /// pointer. 119 const TargetRegisterClass * 120 Classes[TargetRegisterInfo::FirstVirtualRegister]; 121 122 /// RegRegs - Map registers to all their references within a live range. 123 std::multimap<unsigned, MachineOperand *> RegRefs; 124 125 /// The index of the most recent kill (proceding bottom-up), or ~0u if 126 /// the register is not live. 127 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 128 129 /// The index of the most recent complete def (proceding bottom up), or ~0u 130 /// if the register is live. 131 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 132 133 public: 134 SchedulePostRATDList(MachineFunction &MF, 135 const MachineLoopInfo &MLI, 136 const MachineDominatorTree &MDT, 137 ScheduleHazardRecognizer *HR) 138 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 139 AllocatableSet(TRI->getAllocatableSet(MF)), 140 HazardRec(HR) {} 141 142 ~SchedulePostRATDList() { 143 delete HazardRec; 144 } 145 146 /// StartBlock - Initialize register live-range state for scheduling in 147 /// this block. 148 /// 149 void StartBlock(MachineBasicBlock *BB); 150 151 /// Schedule - Schedule the instruction range using list scheduling. 152 /// 153 void Schedule(); 154 155 /// FixupKills - Fix register kill flags that have been made 156 /// invalid due to scheduling 157 /// 158 void FixupKills(MachineBasicBlock *MBB); 159 160 /// Observe - Update liveness information to account for the current 161 /// instruction, which will not be scheduled. 162 /// 163 void Observe(MachineInstr *MI, unsigned Count); 164 165 /// FinishBlock - Clean up register live-range state. 166 /// 167 void FinishBlock(); 168 169 private: 170 void PrescanInstruction(MachineInstr *MI); 171 void ScanInstruction(MachineInstr *MI, unsigned Count); 172 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 173 void ReleaseSuccessors(SUnit *SU); 174 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 175 void ListScheduleTopDown(); 176 bool BreakAntiDependencies(); 177 unsigned findSuitableFreeRegister(unsigned AntiDepReg, 178 unsigned LastNewReg, 179 const TargetRegisterClass *); 180 void StartBlockForKills(MachineBasicBlock *BB); 181 182 // ToggleKillFlag - Toggle a register operand kill flag. Other 183 // adjustments may be made to the instruction if necessary. Return 184 // true if the operand has been deleted, false if not. 185 bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); 186 }; 187} 188 189/// isSchedulingBoundary - Test if the given instruction should be 190/// considered a scheduling boundary. This primarily includes labels 191/// and terminators. 192/// 193static bool isSchedulingBoundary(const MachineInstr *MI, 194 const MachineFunction &MF) { 195 // Terminators and labels can't be scheduled around. 196 if (MI->getDesc().isTerminator() || MI->isLabel()) 197 return true; 198 199 // Don't attempt to schedule around any instruction that modifies 200 // a stack-oriented pointer, as it's unlikely to be profitable. This 201 // saves compile time, because it doesn't require every single 202 // stack slot reference to depend on the instruction that does the 203 // modification. 204 const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); 205 if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) 206 return true; 207 208 return false; 209} 210 211bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 212 DEBUG(errs() << "PostRAScheduler\n"); 213 214 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 215 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 216 const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); 217 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 218 (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : 219 (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); 220 221 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 222 223 // Loop over all of the basic blocks 224 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 225 MBB != MBBe; ++MBB) { 226#ifndef NDEBUG 227 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 228 if (DebugDiv > 0) { 229 static int bbcnt = 0; 230 if (bbcnt++ % DebugDiv != DebugMod) 231 continue; 232 errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << 233 ":MBB ID#" << MBB->getNumber() << " ***\n"; 234 } 235#endif 236 237 // Initialize register live-range state for scheduling in this block. 238 Scheduler.StartBlock(MBB); 239 240 // Schedule each sequence of instructions not interrupted by a label 241 // or anything else that effectively needs to shut down scheduling. 242 MachineBasicBlock::iterator Current = MBB->end(); 243 unsigned Count = MBB->size(), CurrentCount = Count; 244 for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { 245 MachineInstr *MI = prior(I); 246 if (isSchedulingBoundary(MI, Fn)) { 247 Scheduler.Run(MBB, I, Current, CurrentCount); 248 Scheduler.EmitSchedule(0); 249 Current = MI; 250 CurrentCount = Count - 1; 251 Scheduler.Observe(MI, CurrentCount); 252 } 253 I = MI; 254 --Count; 255 } 256 assert(Count == 0 && "Instruction count mismatch!"); 257 assert((MBB->begin() == Current || CurrentCount != 0) && 258 "Instruction count mismatch!"); 259 Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); 260 Scheduler.EmitSchedule(0); 261 262 // Clean up register live-range state. 263 Scheduler.FinishBlock(); 264 265 // Update register kills 266 Scheduler.FixupKills(MBB); 267 } 268 269 return true; 270} 271 272/// StartBlock - Initialize register live-range state for scheduling in 273/// this block. 274/// 275void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { 276 // Call the superclass. 277 ScheduleDAGInstrs::StartBlock(BB); 278 279 // Reset the hazard recognizer. 280 HazardRec->Reset(); 281 282 // Clear out the register class data. 283 std::fill(Classes, array_endof(Classes), 284 static_cast<const TargetRegisterClass *>(0)); 285 286 // Initialize the indices to indicate that no registers are live. 287 std::fill(KillIndices, array_endof(KillIndices), ~0u); 288 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 289 290 // Determine the live-out physregs for this block. 291 if (!BB->empty() && BB->back().getDesc().isReturn()) 292 // In a return block, examine the function live-out regs. 293 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 294 E = MRI.liveout_end(); I != E; ++I) { 295 unsigned Reg = *I; 296 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 297 KillIndices[Reg] = BB->size(); 298 DefIndices[Reg] = ~0u; 299 // Repeat, for all aliases. 300 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 301 unsigned AliasReg = *Alias; 302 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 303 KillIndices[AliasReg] = BB->size(); 304 DefIndices[AliasReg] = ~0u; 305 } 306 } 307 else 308 // In a non-return block, examine the live-in regs of all successors. 309 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 310 SE = BB->succ_end(); SI != SE; ++SI) 311 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 312 E = (*SI)->livein_end(); I != E; ++I) { 313 unsigned Reg = *I; 314 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 315 KillIndices[Reg] = BB->size(); 316 DefIndices[Reg] = ~0u; 317 // Repeat, for all aliases. 318 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 319 unsigned AliasReg = *Alias; 320 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 321 KillIndices[AliasReg] = BB->size(); 322 DefIndices[AliasReg] = ~0u; 323 } 324 } 325 326 // Consider callee-saved registers as live-out, since we're running after 327 // prologue/epilogue insertion so there's no way to add additional 328 // saved registers. 329 // 330 // TODO: there is a new method 331 // MachineFrameInfo::getPristineRegs(MBB). It gives you a list of 332 // CSRs that have not been saved when entering the MBB. The 333 // remaining CSRs have been saved and can be treated like call 334 // clobbered registers. 335 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 336 unsigned Reg = *I; 337 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 338 KillIndices[Reg] = BB->size(); 339 DefIndices[Reg] = ~0u; 340 // Repeat, for all aliases. 341 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 342 unsigned AliasReg = *Alias; 343 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 344 KillIndices[AliasReg] = BB->size(); 345 DefIndices[AliasReg] = ~0u; 346 } 347 } 348} 349 350/// Schedule - Schedule the instruction range using list scheduling. 351/// 352void SchedulePostRATDList::Schedule() { 353 DEBUG(errs() << "********** List Scheduling **********\n"); 354 355 // Build the scheduling graph. 356 BuildSchedGraph(); 357 358 if (EnableAntiDepBreaking) { 359 if (BreakAntiDependencies()) { 360 // We made changes. Update the dependency graph. 361 // Theoretically we could update the graph in place: 362 // When a live range is changed to use a different register, remove 363 // the def's anti-dependence *and* output-dependence edges due to 364 // that register, and add new anti-dependence and output-dependence 365 // edges based on the next live range of the register. 366 SUnits.clear(); 367 EntrySU = SUnit(); 368 ExitSU = SUnit(); 369 BuildSchedGraph(); 370 } 371 } 372 373 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 374 SUnits[su].dumpAll(this)); 375 376 AvailableQueue.initNodes(SUnits); 377 378 ListScheduleTopDown(); 379 380 AvailableQueue.releaseState(); 381} 382 383/// Observe - Update liveness information to account for the current 384/// instruction, which will not be scheduled. 385/// 386void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { 387 assert(Count < InsertPosIndex && "Instruction index out of expected range!"); 388 389 // Any register which was defined within the previous scheduling region 390 // may have been rescheduled and its lifetime may overlap with registers 391 // in ways not reflected in our current liveness state. For each such 392 // register, adjust the liveness state to be conservatively correct. 393 for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) 394 if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { 395 assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); 396 // Mark this register to be non-renamable. 397 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 398 // Move the def index to the end of the previous region, to reflect 399 // that the def could theoretically have been scheduled at the end. 400 DefIndices[Reg] = InsertPosIndex; 401 } 402 403 PrescanInstruction(MI); 404 ScanInstruction(MI, Count); 405} 406 407/// FinishBlock - Clean up register live-range state. 408/// 409void SchedulePostRATDList::FinishBlock() { 410 RegRefs.clear(); 411 412 // Call the superclass. 413 ScheduleDAGInstrs::FinishBlock(); 414} 415 416/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 417/// critical path. 418static SDep *CriticalPathStep(SUnit *SU) { 419 SDep *Next = 0; 420 unsigned NextDepth = 0; 421 // Find the predecessor edge with the greatest depth. 422 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 423 P != PE; ++P) { 424 SUnit *PredSU = P->getSUnit(); 425 unsigned PredLatency = P->getLatency(); 426 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 427 // In the case of a latency tie, prefer an anti-dependency edge over 428 // other types of edges. 429 if (NextDepth < PredTotalLatency || 430 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 431 NextDepth = PredTotalLatency; 432 Next = &*P; 433 } 434 } 435 return Next; 436} 437 438void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { 439 // Scan the register operands for this instruction and update 440 // Classes and RegRefs. 441 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 442 MachineOperand &MO = MI->getOperand(i); 443 if (!MO.isReg()) continue; 444 unsigned Reg = MO.getReg(); 445 if (Reg == 0) continue; 446 const TargetRegisterClass *NewRC = 0; 447 448 if (i < MI->getDesc().getNumOperands()) 449 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 450 451 // For now, only allow the register to be changed if its register 452 // class is consistent across all uses. 453 if (!Classes[Reg] && NewRC) 454 Classes[Reg] = NewRC; 455 else if (!NewRC || Classes[Reg] != NewRC) 456 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 457 458 // Now check for aliases. 459 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 460 // If an alias of the reg is used during the live range, give up. 461 // Note that this allows us to skip checking if AntiDepReg 462 // overlaps with any of the aliases, among other things. 463 unsigned AliasReg = *Alias; 464 if (Classes[AliasReg]) { 465 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 466 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 467 } 468 } 469 470 // If we're still willing to consider this register, note the reference. 471 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 472 RegRefs.insert(std::make_pair(Reg, &MO)); 473 } 474} 475 476void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, 477 unsigned Count) { 478 // Update liveness. 479 // Proceding upwards, registers that are defed but not used in this 480 // instruction are now dead. 481 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 482 MachineOperand &MO = MI->getOperand(i); 483 if (!MO.isReg()) continue; 484 unsigned Reg = MO.getReg(); 485 if (Reg == 0) continue; 486 if (!MO.isDef()) continue; 487 // Ignore two-addr defs. 488 if (MI->isRegTiedToUseOperand(i)) continue; 489 490 DefIndices[Reg] = Count; 491 KillIndices[Reg] = ~0u; 492 assert(((KillIndices[Reg] == ~0u) != 493 (DefIndices[Reg] == ~0u)) && 494 "Kill and Def maps aren't consistent for Reg!"); 495 Classes[Reg] = 0; 496 RegRefs.erase(Reg); 497 // Repeat, for all subregs. 498 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 499 *Subreg; ++Subreg) { 500 unsigned SubregReg = *Subreg; 501 DefIndices[SubregReg] = Count; 502 KillIndices[SubregReg] = ~0u; 503 Classes[SubregReg] = 0; 504 RegRefs.erase(SubregReg); 505 } 506 // Conservatively mark super-registers as unusable. 507 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 508 *Super; ++Super) { 509 unsigned SuperReg = *Super; 510 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 511 } 512 } 513 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 514 MachineOperand &MO = MI->getOperand(i); 515 if (!MO.isReg()) continue; 516 unsigned Reg = MO.getReg(); 517 if (Reg == 0) continue; 518 if (!MO.isUse()) continue; 519 520 const TargetRegisterClass *NewRC = 0; 521 if (i < MI->getDesc().getNumOperands()) 522 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 523 524 // For now, only allow the register to be changed if its register 525 // class is consistent across all uses. 526 if (!Classes[Reg] && NewRC) 527 Classes[Reg] = NewRC; 528 else if (!NewRC || Classes[Reg] != NewRC) 529 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 530 531 RegRefs.insert(std::make_pair(Reg, &MO)); 532 533 // It wasn't previously live but now it is, this is a kill. 534 if (KillIndices[Reg] == ~0u) { 535 KillIndices[Reg] = Count; 536 DefIndices[Reg] = ~0u; 537 assert(((KillIndices[Reg] == ~0u) != 538 (DefIndices[Reg] == ~0u)) && 539 "Kill and Def maps aren't consistent for Reg!"); 540 } 541 // Repeat, for all aliases. 542 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 543 unsigned AliasReg = *Alias; 544 if (KillIndices[AliasReg] == ~0u) { 545 KillIndices[AliasReg] = Count; 546 DefIndices[AliasReg] = ~0u; 547 } 548 } 549 } 550} 551 552unsigned 553SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg, 554 unsigned LastNewReg, 555 const TargetRegisterClass *RC) { 556 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 557 RE = RC->allocation_order_end(MF); R != RE; ++R) { 558 unsigned NewReg = *R; 559 // Don't replace a register with itself. 560 if (NewReg == AntiDepReg) continue; 561 // Don't replace a register with one that was recently used to repair 562 // an anti-dependence with this AntiDepReg, because that would 563 // re-introduce that anti-dependence. 564 if (NewReg == LastNewReg) continue; 565 // If NewReg is dead and NewReg's most recent def is not before 566 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 567 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 568 "Kill and Def maps aren't consistent for AntiDepReg!"); 569 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 570 "Kill and Def maps aren't consistent for NewReg!"); 571 if (KillIndices[NewReg] != ~0u || 572 Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || 573 KillIndices[AntiDepReg] > DefIndices[NewReg]) 574 continue; 575 return NewReg; 576 } 577 578 // No registers are free and available! 579 return 0; 580} 581 582/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 583/// of the ScheduleDAG and break them by renaming registers. 584/// 585bool SchedulePostRATDList::BreakAntiDependencies() { 586 // The code below assumes that there is at least one instruction, 587 // so just duck out immediately if the block is empty. 588 if (SUnits.empty()) return false; 589 590 // Find the node at the bottom of the critical path. 591 SUnit *Max = 0; 592 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 593 SUnit *SU = &SUnits[i]; 594 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 595 Max = SU; 596 } 597 598 DEBUG(errs() << "Critical path has total latency " 599 << (Max->getDepth() + Max->Latency) << "\n"); 600 601 // Track progress along the critical path through the SUnit graph as we walk 602 // the instructions. 603 SUnit *CriticalPathSU = Max; 604 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 605 606 // Consider this pattern: 607 // A = ... 608 // ... = A 609 // A = ... 610 // ... = A 611 // A = ... 612 // ... = A 613 // A = ... 614 // ... = A 615 // There are three anti-dependencies here, and without special care, 616 // we'd break all of them using the same register: 617 // A = ... 618 // ... = A 619 // B = ... 620 // ... = B 621 // B = ... 622 // ... = B 623 // B = ... 624 // ... = B 625 // because at each anti-dependence, B is the first register that 626 // isn't A which is free. This re-introduces anti-dependencies 627 // at all but one of the original anti-dependencies that we were 628 // trying to break. To avoid this, keep track of the most recent 629 // register that each register was replaced with, avoid 630 // using it to repair an anti-dependence on the same register. 631 // This lets us produce this: 632 // A = ... 633 // ... = A 634 // B = ... 635 // ... = B 636 // C = ... 637 // ... = C 638 // B = ... 639 // ... = B 640 // This still has an anti-dependence on B, but at least it isn't on the 641 // original critical path. 642 // 643 // TODO: If we tracked more than one register here, we could potentially 644 // fix that remaining critical edge too. This is a little more involved, 645 // because unlike the most recent register, less recent registers should 646 // still be considered, though only if no other registers are available. 647 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 648 649 // Attempt to break anti-dependence edges on the critical path. Walk the 650 // instructions from the bottom up, tracking information about liveness 651 // as we go to help determine which registers are available. 652 bool Changed = false; 653 unsigned Count = InsertPosIndex - 1; 654 for (MachineBasicBlock::iterator I = InsertPos, E = Begin; 655 I != E; --Count) { 656 MachineInstr *MI = --I; 657 658 // After regalloc, KILL instructions aren't safe to treat as 659 // dependence-breaking. In the case of an INSERT_SUBREG, the KILL 660 // is left behind appearing to clobber the super-register, while the 661 // subregister needs to remain live. So we just ignore them. 662 if (MI->getOpcode() == TargetInstrInfo::KILL) 663 continue; 664 665 // Check if this instruction has a dependence on the critical path that 666 // is an anti-dependence that we may be able to break. If it is, set 667 // AntiDepReg to the non-zero register associated with the anti-dependence. 668 // 669 // We limit our attention to the critical path as a heuristic to avoid 670 // breaking anti-dependence edges that aren't going to significantly 671 // impact the overall schedule. There are a limited number of registers 672 // and we want to save them for the important edges. 673 // 674 // TODO: Instructions with multiple defs could have multiple 675 // anti-dependencies. The current code here only knows how to break one 676 // edge per instruction. Note that we'd have to be able to break all of 677 // the anti-dependencies in an instruction in order to be effective. 678 unsigned AntiDepReg = 0; 679 if (MI == CriticalPathMI) { 680 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 681 SUnit *NextSU = Edge->getSUnit(); 682 683 // Only consider anti-dependence edges. 684 if (Edge->getKind() == SDep::Anti) { 685 AntiDepReg = Edge->getReg(); 686 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 687 // Don't break anti-dependencies on non-allocatable registers. 688 if (!AllocatableSet.test(AntiDepReg)) 689 AntiDepReg = 0; 690 else { 691 // If the SUnit has other dependencies on the SUnit that it 692 // anti-depends on, don't bother breaking the anti-dependency 693 // since those edges would prevent such units from being 694 // scheduled past each other regardless. 695 // 696 // Also, if there are dependencies on other SUnits with the 697 // same register as the anti-dependency, don't attempt to 698 // break it. 699 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 700 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 701 if (P->getSUnit() == NextSU ? 702 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 703 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 704 AntiDepReg = 0; 705 break; 706 } 707 } 708 } 709 CriticalPathSU = NextSU; 710 CriticalPathMI = CriticalPathSU->getInstr(); 711 } else { 712 // We've reached the end of the critical path. 713 CriticalPathSU = 0; 714 CriticalPathMI = 0; 715 } 716 } 717 718 PrescanInstruction(MI); 719 720 // If this instruction has a use of AntiDepReg, breaking it 721 // is invalid. 722 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 723 MachineOperand &MO = MI->getOperand(i); 724 if (!MO.isReg()) continue; 725 unsigned Reg = MO.getReg(); 726 if (Reg == 0) continue; 727 if (MO.isUse() && AntiDepReg == Reg) { 728 AntiDepReg = 0; 729 break; 730 } 731 } 732 733 // Determine AntiDepReg's register class, if it is live and is 734 // consistently used within a single class. 735 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 736 assert((AntiDepReg == 0 || RC != NULL) && 737 "Register should be live if it's causing an anti-dependence!"); 738 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 739 AntiDepReg = 0; 740 741 // Look for a suitable register to use to break the anti-depenence. 742 // 743 // TODO: Instead of picking the first free register, consider which might 744 // be the best. 745 if (AntiDepReg != 0) { 746 if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, 747 LastNewReg[AntiDepReg], 748 RC)) { 749 DEBUG(errs() << "Breaking anti-dependence edge on " 750 << TRI->getName(AntiDepReg) 751 << " with " << RegRefs.count(AntiDepReg) << " references" 752 << " using " << TRI->getName(NewReg) << "!\n"); 753 754 // Update the references to the old register to refer to the new 755 // register. 756 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 757 std::multimap<unsigned, MachineOperand *>::iterator> 758 Range = RegRefs.equal_range(AntiDepReg); 759 for (std::multimap<unsigned, MachineOperand *>::iterator 760 Q = Range.first, QE = Range.second; Q != QE; ++Q) 761 Q->second->setReg(NewReg); 762 763 // We just went back in time and modified history; the 764 // liveness information for the anti-depenence reg is now 765 // inconsistent. Set the state as if it were dead. 766 Classes[NewReg] = Classes[AntiDepReg]; 767 DefIndices[NewReg] = DefIndices[AntiDepReg]; 768 KillIndices[NewReg] = KillIndices[AntiDepReg]; 769 assert(((KillIndices[NewReg] == ~0u) != 770 (DefIndices[NewReg] == ~0u)) && 771 "Kill and Def maps aren't consistent for NewReg!"); 772 773 Classes[AntiDepReg] = 0; 774 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 775 KillIndices[AntiDepReg] = ~0u; 776 assert(((KillIndices[AntiDepReg] == ~0u) != 777 (DefIndices[AntiDepReg] == ~0u)) && 778 "Kill and Def maps aren't consistent for AntiDepReg!"); 779 780 RegRefs.erase(AntiDepReg); 781 Changed = true; 782 LastNewReg[AntiDepReg] = NewReg; 783 } 784 } 785 786 ScanInstruction(MI, Count); 787 } 788 789 return Changed; 790} 791 792/// StartBlockForKills - Initialize register live-range state for updating kills 793/// 794void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { 795 // Initialize the indices to indicate that no registers are live. 796 std::fill(KillIndices, array_endof(KillIndices), ~0u); 797 798 // Determine the live-out physregs for this block. 799 if (!BB->empty() && BB->back().getDesc().isReturn()) { 800 // In a return block, examine the function live-out regs. 801 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 802 E = MRI.liveout_end(); I != E; ++I) { 803 unsigned Reg = *I; 804 KillIndices[Reg] = BB->size(); 805 // Repeat, for all subregs. 806 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 807 *Subreg; ++Subreg) { 808 KillIndices[*Subreg] = BB->size(); 809 } 810 } 811 } 812 else { 813 // In a non-return block, examine the live-in regs of all successors. 814 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 815 SE = BB->succ_end(); SI != SE; ++SI) { 816 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 817 E = (*SI)->livein_end(); I != E; ++I) { 818 unsigned Reg = *I; 819 KillIndices[Reg] = BB->size(); 820 // Repeat, for all subregs. 821 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 822 *Subreg; ++Subreg) { 823 KillIndices[*Subreg] = BB->size(); 824 } 825 } 826 } 827 } 828} 829 830bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, 831 MachineOperand &MO) { 832 // Setting kill flag... 833 if (!MO.isKill()) { 834 MO.setIsKill(true); 835 return false; 836 } 837 838 // If MO itself is live, clear the kill flag... 839 if (KillIndices[MO.getReg()] != ~0u) { 840 MO.setIsKill(false); 841 return false; 842 } 843 844 // If any subreg of MO is live, then create an imp-def for that 845 // subreg and keep MO marked as killed. 846 bool AllDead = true; 847 const unsigned SuperReg = MO.getReg(); 848 for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); 849 *Subreg; ++Subreg) { 850 if (KillIndices[*Subreg] != ~0u) { 851 MI->addOperand(MachineOperand::CreateReg(*Subreg, 852 true /*IsDef*/, 853 true /*IsImp*/, 854 false /*IsKill*/, 855 false /*IsDead*/)); 856 AllDead = false; 857 } 858 } 859 860 MO.setIsKill(AllDead); 861 return false; 862} 863 864/// FixupKills - Fix the register kill flags, they may have been made 865/// incorrect by instruction reordering. 866/// 867void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { 868 DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n'); 869 870 std::set<unsigned> killedRegs; 871 BitVector ReservedRegs = TRI->getReservedRegs(MF); 872 873 StartBlockForKills(MBB); 874 875 // Examine block from end to start... 876 unsigned Count = MBB->size(); 877 for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); 878 I != E; --Count) { 879 MachineInstr *MI = --I; 880 881 // Update liveness. Registers that are defed but not used in this 882 // instruction are now dead. Mark register and all subregs as they 883 // are completely defined. 884 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 885 MachineOperand &MO = MI->getOperand(i); 886 if (!MO.isReg()) continue; 887 unsigned Reg = MO.getReg(); 888 if (Reg == 0) continue; 889 if (!MO.isDef()) continue; 890 // Ignore two-addr defs. 891 if (MI->isRegTiedToUseOperand(i)) continue; 892 893 KillIndices[Reg] = ~0u; 894 895 // Repeat for all subregs. 896 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 897 *Subreg; ++Subreg) { 898 KillIndices[*Subreg] = ~0u; 899 } 900 } 901 902 // Examine all used registers and set/clear kill flag. When a 903 // register is used multiple times we only set the kill flag on 904 // the first use. 905 killedRegs.clear(); 906 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 907 MachineOperand &MO = MI->getOperand(i); 908 if (!MO.isReg() || !MO.isUse()) continue; 909 unsigned Reg = MO.getReg(); 910 if ((Reg == 0) || ReservedRegs.test(Reg)) continue; 911 912 bool kill = false; 913 if (killedRegs.find(Reg) == killedRegs.end()) { 914 kill = true; 915 // A register is not killed if any subregs are live... 916 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 917 *Subreg; ++Subreg) { 918 if (KillIndices[*Subreg] != ~0u) { 919 kill = false; 920 break; 921 } 922 } 923 924 // If subreg is not live, then register is killed if it became 925 // live in this instruction 926 if (kill) 927 kill = (KillIndices[Reg] == ~0u); 928 } 929 930 if (MO.isKill() != kill) { 931 bool removed = ToggleKillFlag(MI, MO); 932 if (removed) { 933 DEBUG(errs() << "Fixed <removed> in "); 934 } else { 935 DEBUG(errs() << "Fixed " << MO << " in "); 936 } 937 DEBUG(MI->dump()); 938 } 939 940 killedRegs.insert(Reg); 941 } 942 943 // Mark any used register (that is not using undef) and subregs as 944 // now live... 945 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 946 MachineOperand &MO = MI->getOperand(i); 947 if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; 948 unsigned Reg = MO.getReg(); 949 if ((Reg == 0) || ReservedRegs.test(Reg)) continue; 950 951 KillIndices[Reg] = Count; 952 953 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 954 *Subreg; ++Subreg) { 955 KillIndices[*Subreg] = Count; 956 } 957 } 958 } 959} 960 961//===----------------------------------------------------------------------===// 962// Top-Down Scheduling 963//===----------------------------------------------------------------------===// 964 965/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 966/// the PendingQueue if the count reaches zero. Also update its cycle bound. 967void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 968 SUnit *SuccSU = SuccEdge->getSUnit(); 969 --SuccSU->NumPredsLeft; 970 971#ifndef NDEBUG 972 if (SuccSU->NumPredsLeft < 0) { 973 errs() << "*** Scheduling failed! ***\n"; 974 SuccSU->dump(this); 975 errs() << " has been released too many times!\n"; 976 llvm_unreachable(0); 977 } 978#endif 979 980 // Compute how many cycles it will be before this actually becomes 981 // available. This is the max of the start time of all predecessors plus 982 // their latencies. 983 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 984 985 // If all the node's predecessors are scheduled, this node is ready 986 // to be scheduled. Ignore the special ExitSU node. 987 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 988 PendingQueue.push_back(SuccSU); 989} 990 991/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. 992void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { 993 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 994 I != E; ++I) 995 ReleaseSucc(SU, &*I); 996} 997 998/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 999/// count of its successors. If a successor pending count is zero, add it to 1000/// the Available queue. 1001void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 1002 DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); 1003 DEBUG(SU->dump(this)); 1004 1005 Sequence.push_back(SU); 1006 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1007 SU->setDepthToAtLeast(CurCycle); 1008 1009 ReleaseSuccessors(SU); 1010 SU->isScheduled = true; 1011 AvailableQueue.ScheduledNode(SU); 1012} 1013 1014/// ListScheduleTopDown - The main loop of list scheduling for top-down 1015/// schedulers. 1016void SchedulePostRATDList::ListScheduleTopDown() { 1017 unsigned CurCycle = 0; 1018 1019 // Release any successors of the special Entry node. 1020 ReleaseSuccessors(&EntrySU); 1021 1022 // All leaves to Available queue. 1023 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1024 // It is available if it has no predecessors. 1025 if (SUnits[i].Preds.empty()) { 1026 AvailableQueue.push(&SUnits[i]); 1027 SUnits[i].isAvailable = true; 1028 } 1029 } 1030 1031 // In any cycle where we can't schedule any instructions, we must 1032 // stall or emit a noop, depending on the target. 1033 bool CycleHasInsts = false; 1034 1035 // While Available queue is not empty, grab the node with the highest 1036 // priority. If it is not ready put it back. Schedule the node. 1037 std::vector<SUnit*> NotReady; 1038 Sequence.reserve(SUnits.size()); 1039 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 1040 // Check to see if any of the pending instructions are ready to issue. If 1041 // so, add them to the available queue. 1042 unsigned MinDepth = ~0u; 1043 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 1044 if (PendingQueue[i]->getDepth() <= CurCycle) { 1045 AvailableQueue.push(PendingQueue[i]); 1046 PendingQueue[i]->isAvailable = true; 1047 PendingQueue[i] = PendingQueue.back(); 1048 PendingQueue.pop_back(); 1049 --i; --e; 1050 } else if (PendingQueue[i]->getDepth() < MinDepth) 1051 MinDepth = PendingQueue[i]->getDepth(); 1052 } 1053 1054 DEBUG(errs() << "\n*** Examining Available\n"; 1055 LatencyPriorityQueue q = AvailableQueue; 1056 while (!q.empty()) { 1057 SUnit *su = q.pop(); 1058 errs() << "Height " << su->getHeight() << ": "; 1059 su->dump(this); 1060 }); 1061 1062 SUnit *FoundSUnit = 0; 1063 1064 bool HasNoopHazards = false; 1065 while (!AvailableQueue.empty()) { 1066 SUnit *CurSUnit = AvailableQueue.pop(); 1067 1068 ScheduleHazardRecognizer::HazardType HT = 1069 HazardRec->getHazardType(CurSUnit); 1070 if (HT == ScheduleHazardRecognizer::NoHazard) { 1071 FoundSUnit = CurSUnit; 1072 break; 1073 } 1074 1075 // Remember if this is a noop hazard. 1076 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 1077 1078 NotReady.push_back(CurSUnit); 1079 } 1080 1081 // Add the nodes that aren't ready back onto the available list. 1082 if (!NotReady.empty()) { 1083 AvailableQueue.push_all(NotReady); 1084 NotReady.clear(); 1085 } 1086 1087 // If we found a node to schedule, do it now. 1088 if (FoundSUnit) { 1089 ScheduleNodeTopDown(FoundSUnit, CurCycle); 1090 HazardRec->EmitInstruction(FoundSUnit); 1091 CycleHasInsts = true; 1092 1093 // If we are using the target-specific hazards, then don't 1094 // advance the cycle time just because we schedule a node. If 1095 // the target allows it we can schedule multiple nodes in the 1096 // same cycle. 1097 if (!EnablePostRAHazardAvoidance) { 1098 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 1099 ++CurCycle; 1100 } 1101 } else { 1102 if (CycleHasInsts) { 1103 DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); 1104 HazardRec->AdvanceCycle(); 1105 } else if (!HasNoopHazards) { 1106 // Otherwise, we have a pipeline stall, but no other problem, 1107 // just advance the current cycle and try again. 1108 DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); 1109 HazardRec->AdvanceCycle(); 1110 ++NumStalls; 1111 } else { 1112 // Otherwise, we have no instructions to issue and we have instructions 1113 // that will fault if we don't do this right. This is the case for 1114 // processors without pipeline interlocks and other cases. 1115 DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); 1116 HazardRec->EmitNoop(); 1117 Sequence.push_back(0); // NULL here means noop 1118 ++NumNoops; 1119 } 1120 1121 ++CurCycle; 1122 CycleHasInsts = false; 1123 } 1124 } 1125 1126#ifndef NDEBUG 1127 VerifySchedule(/*isBottomUp=*/false); 1128#endif 1129} 1130 1131//===----------------------------------------------------------------------===// 1132// Public Constructor Functions 1133//===----------------------------------------------------------------------===// 1134 1135FunctionPass *llvm::createPostRAScheduler() { 1136 return new PostRAScheduler(); 1137} 1138