PostRASchedulerList.cpp revision afca4630b4689e3fd3997682add43f86a6e67a5c
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "ScheduleDAGInstrs.h" 23#include "llvm/CodeGen/Passes.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/ADT/Statistic.h" 37#include <map> 38using namespace llvm; 39 40STATISTIC(NumNoops, "Number of noops inserted"); 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break post-RA scheduling anti-dependencies"), 46 cl::init(true), cl::Hidden); 47 48static cl::opt<bool> 49EnablePostRAHazardAvoidance("avoid-hazards", 50 cl::desc("Enable simple hazard-avoidance"), 51 cl::init(true), cl::Hidden); 52 53namespace { 54 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 55 public: 56 static char ID; 57 PostRAScheduler() : MachineFunctionPass(&ID) {} 58 59 void getAnalysisUsage(AnalysisUsage &AU) const { 60 AU.addRequired<MachineDominatorTree>(); 61 AU.addPreserved<MachineDominatorTree>(); 62 AU.addRequired<MachineLoopInfo>(); 63 AU.addPreserved<MachineLoopInfo>(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 const char *getPassName() const { 68 return "Post RA top-down list latency scheduler"; 69 } 70 71 bool runOnMachineFunction(MachineFunction &Fn); 72 }; 73 char PostRAScheduler::ID = 0; 74 75 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 76 /// AvailableQueue - The priority queue to use for the available SUnits. 77 /// 78 LatencyPriorityQueue AvailableQueue; 79 80 /// PendingQueue - This contains all of the instructions whose operands have 81 /// been issued, but their results are not ready yet (due to the latency of 82 /// the operation). Once the operands becomes available, the instruction is 83 /// added to the AvailableQueue. 84 std::vector<SUnit*> PendingQueue; 85 86 /// Topo - A topological ordering for SUnits. 87 ScheduleDAGTopologicalSort Topo; 88 89 /// AllocatableSet - The set of allocatable registers. 90 /// We'll be ignoring anti-dependencies on non-allocatable registers, 91 /// because they may not be safe to break. 92 const BitVector AllocatableSet; 93 94 /// HazardRec - The hazard recognizer to use. 95 ScheduleHazardRecognizer *HazardRec; 96 97 public: 98 SchedulePostRATDList(MachineFunction &MF, 99 const MachineLoopInfo &MLI, 100 const MachineDominatorTree &MDT, 101 ScheduleHazardRecognizer *HR) 102 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 103 AllocatableSet(TRI->getAllocatableSet(MF)), 104 HazardRec(HR) {} 105 106 ~SchedulePostRATDList() { 107 delete HazardRec; 108 } 109 110 void Schedule(); 111 112 private: 113 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 114 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 115 void ListScheduleTopDown(); 116 bool BreakAntiDependencies(); 117 }; 118 119 /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses 120 /// a coarse classification and attempts to avoid that instructions of 121 /// a given class aren't grouped too densely together. 122 class SimpleHazardRecognizer : public ScheduleHazardRecognizer { 123 /// Class - A simple classification for SUnits. 124 enum Class { 125 Other, Load, Store 126 }; 127 128 /// Window - The Class values of the most recently issued 129 /// instructions. 130 Class Window[8]; 131 132 /// getClass - Classify the given SUnit. 133 Class getClass(const SUnit *SU) { 134 const MachineInstr *MI = SU->getInstr(); 135 const TargetInstrDesc &TID = MI->getDesc(); 136 if (TID.mayLoad()) 137 return Load; 138 if (TID.mayStore()) 139 return Store; 140 return Other; 141 } 142 143 /// Step - Rotate the existing entries in Window and insert the 144 /// given class value in position as the most recent. 145 void Step(Class C) { 146 std::copy(Window+1, array_endof(Window), Window); 147 Window[array_lengthof(Window)-1] = C; 148 } 149 150 public: 151 SimpleHazardRecognizer() : Window() {} 152 153 virtual HazardType getHazardType(SUnit *SU) { 154 Class C = getClass(SU); 155 if (C == Other) 156 return NoHazard; 157 unsigned Score = 0; 158 for (unsigned i = 0; i != array_lengthof(Window); ++i) 159 if (Window[i] == C) 160 Score += i + 1; 161 if (Score > array_lengthof(Window) * 2) 162 return Hazard; 163 return NoHazard; 164 } 165 166 virtual void EmitInstruction(SUnit *SU) { 167 Step(getClass(SU)); 168 } 169 170 virtual void AdvanceCycle() { 171 Step(Other); 172 } 173 }; 174} 175 176bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 177 DOUT << "PostRAScheduler\n"; 178 179 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 180 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 181 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 182 new SimpleHazardRecognizer : 183 new ScheduleHazardRecognizer(); 184 185 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 186 187 // Loop over all of the basic blocks 188 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 189 MBB != MBBe; ++MBB) { 190 // Schedule each sequence of instructions not interrupted by a label 191 // or anything else that effectively needs to shut down scheduling. 192 MachineBasicBlock::iterator Current = MBB->end(), Top = MBB->begin(); 193 for (MachineBasicBlock::iterator I = Current; I != Top; ) { 194 MachineInstr *MI = --I; 195 if (MI->getDesc().isTerminator() || MI->isLabel()) { 196 Scheduler.Run(0, MBB, next(I), Current); 197 Scheduler.EmitSchedule(); 198 Current = I; 199 } 200 } 201 202 Scheduler.Run(0, MBB, Top, Current); 203 Scheduler.EmitSchedule(); 204 } 205 206 return true; 207} 208 209/// Schedule - Schedule the DAG using list scheduling. 210void SchedulePostRATDList::Schedule() { 211 DOUT << "********** List Scheduling **********\n"; 212 213 // Build the scheduling graph. 214 BuildSchedGraph(); 215 216 if (EnableAntiDepBreaking) { 217 if (BreakAntiDependencies()) { 218 // We made changes. Update the dependency graph. 219 // Theoretically we could update the graph in place: 220 // When a live range is changed to use a different register, remove 221 // the def's anti-dependence *and* output-dependence edges due to 222 // that register, and add new anti-dependence and output-dependence 223 // edges based on the next live range of the register. 224 SUnits.clear(); 225 BuildSchedGraph(); 226 } 227 } 228 229 AvailableQueue.initNodes(SUnits); 230 231 ListScheduleTopDown(); 232 233 AvailableQueue.releaseState(); 234} 235 236/// getInstrOperandRegClass - Return register class of the operand of an 237/// instruction of the specified TargetInstrDesc. 238static const TargetRegisterClass* 239getInstrOperandRegClass(const TargetRegisterInfo *TRI, 240 const TargetInstrDesc &II, unsigned Op) { 241 if (Op >= II.getNumOperands()) 242 return NULL; 243 if (II.OpInfo[Op].isLookupPtrRegClass()) 244 return TRI->getPointerRegClass(); 245 return TRI->getRegClass(II.OpInfo[Op].RegClass); 246} 247 248/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 249/// critical path. 250static SDep *CriticalPathStep(SUnit *SU) { 251 SDep *Next = 0; 252 unsigned NextDepth = 0; 253 // Find the predecessor edge with the greatest depth. 254 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 255 P != PE; ++P) { 256 SUnit *PredSU = P->getSUnit(); 257 unsigned PredLatency = P->getLatency(); 258 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 259 // In the case of a latency tie, prefer an anti-dependency edge over 260 // other types of edges. 261 if (NextDepth < PredTotalLatency || 262 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 263 NextDepth = PredTotalLatency; 264 Next = &*P; 265 } 266 } 267 return Next; 268} 269 270/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 271/// of the ScheduleDAG and break them by renaming registers. 272/// 273bool SchedulePostRATDList::BreakAntiDependencies() { 274 // The code below assumes that there is at least one instruction, 275 // so just duck out immediately if the block is empty. 276 if (SUnits.empty()) return false; 277 278 // Find the node at the bottom of the critical path. 279 SUnit *Max = 0; 280 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 281 SUnit *SU = &SUnits[i]; 282 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 283 Max = SU; 284 } 285 286 DOUT << "Critical path has total latency " 287 << (Max->getDepth() + Max->Latency) << "\n"; 288 289 // Track progress along the critical path through the SUnit graph as we walk 290 // the instructions. 291 SUnit *CriticalPathSU = Max; 292 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 293 294 // For live regs that are only used in one register class in a live range, 295 // the register class. If the register is not live, the corresponding value 296 // is null. If the register is live but used in multiple register classes, 297 // the corresponding value is -1 casted to a pointer. 298 const TargetRegisterClass * 299 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 300 301 // Map registers to all their references within a live range. 302 std::multimap<unsigned, MachineOperand *> RegRefs; 303 304 // The index of the most recent kill (proceding bottom-up), or ~0u if 305 // the register is not live. 306 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 307 std::fill(KillIndices, array_endof(KillIndices), ~0u); 308 // The index of the most recent complete def (proceding bottom up), or ~0u if 309 // the register is live. 310 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 311 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 312 313 // Determine the live-out physregs for this block. 314 if (BB->back().getDesc().isReturn()) 315 // In a return block, examine the function live-out regs. 316 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 317 E = MRI.liveout_end(); I != E; ++I) { 318 unsigned Reg = *I; 319 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 320 KillIndices[Reg] = BB->size(); 321 DefIndices[Reg] = ~0u; 322 // Repeat, for all aliases. 323 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 324 unsigned AliasReg = *Alias; 325 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 326 KillIndices[AliasReg] = BB->size(); 327 DefIndices[AliasReg] = ~0u; 328 } 329 } 330 else 331 // In a non-return block, examine the live-in regs of all successors. 332 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 333 SE = BB->succ_end(); SI != SE; ++SI) 334 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 335 E = (*SI)->livein_end(); I != E; ++I) { 336 unsigned Reg = *I; 337 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 338 KillIndices[Reg] = BB->size(); 339 DefIndices[Reg] = ~0u; 340 // Repeat, for all aliases. 341 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 342 unsigned AliasReg = *Alias; 343 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 344 KillIndices[AliasReg] = BB->size(); 345 DefIndices[AliasReg] = ~0u; 346 } 347 } 348 349 // Consider callee-saved registers as live-out, since we're running after 350 // prologue/epilogue insertion so there's no way to add additional 351 // saved registers. 352 // 353 // TODO: If the callee saves and restores these, then we can potentially 354 // use them between the save and the restore. To do that, we could scan 355 // the exit blocks to see which of these registers are defined. 356 // Alternatively, callee-saved registers that aren't saved and restored 357 // could be marked live-in in every block. 358 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 359 unsigned Reg = *I; 360 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 361 KillIndices[Reg] = BB->size(); 362 DefIndices[Reg] = ~0u; 363 // Repeat, for all aliases. 364 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 365 unsigned AliasReg = *Alias; 366 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 367 KillIndices[AliasReg] = BB->size(); 368 DefIndices[AliasReg] = ~0u; 369 } 370 } 371 372 // Consider this pattern: 373 // A = ... 374 // ... = A 375 // A = ... 376 // ... = A 377 // A = ... 378 // ... = A 379 // A = ... 380 // ... = A 381 // There are three anti-dependencies here, and without special care, 382 // we'd break all of them using the same register: 383 // A = ... 384 // ... = A 385 // B = ... 386 // ... = B 387 // B = ... 388 // ... = B 389 // B = ... 390 // ... = B 391 // because at each anti-dependence, B is the first register that 392 // isn't A which is free. This re-introduces anti-dependencies 393 // at all but one of the original anti-dependencies that we were 394 // trying to break. To avoid this, keep track of the most recent 395 // register that each register was replaced with, avoid avoid 396 // using it to repair an anti-dependence on the same register. 397 // This lets us produce this: 398 // A = ... 399 // ... = A 400 // B = ... 401 // ... = B 402 // C = ... 403 // ... = C 404 // B = ... 405 // ... = B 406 // This still has an anti-dependence on B, but at least it isn't on the 407 // original critical path. 408 // 409 // TODO: If we tracked more than one register here, we could potentially 410 // fix that remaining critical edge too. This is a little more involved, 411 // because unlike the most recent register, less recent registers should 412 // still be considered, though only if no other registers are available. 413 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 414 415 // Attempt to break anti-dependence edges on the critical path. Walk the 416 // instructions from the bottom up, tracking information about liveness 417 // as we go to help determine which registers are available. 418 bool Changed = false; 419 unsigned Count = SUnits.size() - 1; 420 for (MachineBasicBlock::iterator I = End, E = Begin; 421 I != E; --Count) { 422 MachineInstr *MI = --I; 423 424 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 425 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 426 // is left behind appearing to clobber the super-register, while the 427 // subregister needs to remain live. So we just ignore them. 428 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 429 continue; 430 431 // Check if this instruction has a dependence on the critical path that 432 // is an anti-dependence that we may be able to break. If it is, set 433 // AntiDepReg to the non-zero register associated with the anti-dependence. 434 // 435 // We limit our attention to the critical path as a heuristic to avoid 436 // breaking anti-dependence edges that aren't going to significantly 437 // impact the overall schedule. There are a limited number of registers 438 // and we want to save them for the important edges. 439 // 440 // TODO: Instructions with multiple defs could have multiple 441 // anti-dependencies. The current code here only knows how to break one 442 // edge per instruction. Note that we'd have to be able to break all of 443 // the anti-dependencies in an instruction in order to be effective. 444 unsigned AntiDepReg = 0; 445 if (MI == CriticalPathMI) { 446 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 447 SUnit *NextSU = Edge->getSUnit(); 448 449 // Only consider anti-dependence edges. 450 if (Edge->getKind() == SDep::Anti) { 451 AntiDepReg = Edge->getReg(); 452 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 453 // Don't break anti-dependencies on non-allocatable registers. 454 if (!AllocatableSet.test(AntiDepReg)) 455 AntiDepReg = 0; 456 else { 457 // If the SUnit has other dependencies on the SUnit that it 458 // anti-depends on, don't bother breaking the anti-dependency 459 // since those edges would prevent such units from being 460 // scheduled past each other regardless. 461 // 462 // Also, if there are dependencies on other SUnits with the 463 // same register as the anti-dependency, don't attempt to 464 // break it. 465 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 466 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 467 if (P->getSUnit() == NextSU ? 468 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 469 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 470 AntiDepReg = 0; 471 break; 472 } 473 } 474 } 475 CriticalPathSU = NextSU; 476 CriticalPathMI = CriticalPathSU->getInstr(); 477 } else { 478 // We've reached the end of the critical path. 479 CriticalPathSU = 0; 480 CriticalPathMI = 0; 481 } 482 } 483 484 // Scan the register operands for this instruction and update 485 // Classes and RegRefs. 486 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 487 MachineOperand &MO = MI->getOperand(i); 488 if (!MO.isReg()) continue; 489 unsigned Reg = MO.getReg(); 490 if (Reg == 0) continue; 491 const TargetRegisterClass *NewRC = 492 getInstrOperandRegClass(TRI, MI->getDesc(), i); 493 494 // If this instruction has a use of AntiDepReg, breaking it 495 // is invalid. 496 if (MO.isUse() && AntiDepReg == Reg) 497 AntiDepReg = 0; 498 499 // For now, only allow the register to be changed if its register 500 // class is consistent across all uses. 501 if (!Classes[Reg] && NewRC) 502 Classes[Reg] = NewRC; 503 else if (!NewRC || Classes[Reg] != NewRC) 504 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 505 506 // Now check for aliases. 507 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 508 // If an alias of the reg is used during the live range, give up. 509 // Note that this allows us to skip checking if AntiDepReg 510 // overlaps with any of the aliases, among other things. 511 unsigned AliasReg = *Alias; 512 if (Classes[AliasReg]) { 513 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 514 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 515 } 516 } 517 518 // If we're still willing to consider this register, note the reference. 519 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 520 RegRefs.insert(std::make_pair(Reg, &MO)); 521 } 522 523 // Determine AntiDepReg's register class, if it is live and is 524 // consistently used within a single class. 525 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 526 assert((AntiDepReg == 0 || RC != NULL) && 527 "Register should be live if it's causing an anti-dependence!"); 528 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 529 AntiDepReg = 0; 530 531 // Look for a suitable register to use to break the anti-depenence. 532 // 533 // TODO: Instead of picking the first free register, consider which might 534 // be the best. 535 if (AntiDepReg != 0) { 536 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 537 RE = RC->allocation_order_end(MF); R != RE; ++R) { 538 unsigned NewReg = *R; 539 // Don't replace a register with itself. 540 if (NewReg == AntiDepReg) continue; 541 // Don't replace a register with one that was recently used to repair 542 // an anti-dependence with this AntiDepReg, because that would 543 // re-introduce that anti-dependence. 544 if (NewReg == LastNewReg[AntiDepReg]) continue; 545 // If NewReg is dead and NewReg's most recent def is not before 546 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 547 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 548 "Kill and Def maps aren't consistent for AntiDepReg!"); 549 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 550 "Kill and Def maps aren't consistent for NewReg!"); 551 if (KillIndices[NewReg] == ~0u && 552 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 553 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 554 DOUT << "Breaking anti-dependence edge on " 555 << TRI->getName(AntiDepReg) 556 << " with " << RegRefs.count(AntiDepReg) << " references" 557 << " using " << TRI->getName(NewReg) << "!\n"; 558 559 // Update the references to the old register to refer to the new 560 // register. 561 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 562 std::multimap<unsigned, MachineOperand *>::iterator> 563 Range = RegRefs.equal_range(AntiDepReg); 564 for (std::multimap<unsigned, MachineOperand *>::iterator 565 Q = Range.first, QE = Range.second; Q != QE; ++Q) 566 Q->second->setReg(NewReg); 567 568 // We just went back in time and modified history; the 569 // liveness information for the anti-depenence reg is now 570 // inconsistent. Set the state as if it were dead. 571 Classes[NewReg] = Classes[AntiDepReg]; 572 DefIndices[NewReg] = DefIndices[AntiDepReg]; 573 KillIndices[NewReg] = KillIndices[AntiDepReg]; 574 575 Classes[AntiDepReg] = 0; 576 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 577 KillIndices[AntiDepReg] = ~0u; 578 579 RegRefs.erase(AntiDepReg); 580 Changed = true; 581 LastNewReg[AntiDepReg] = NewReg; 582 break; 583 } 584 } 585 } 586 587 // Update liveness. 588 // Proceding upwards, registers that are defed but not used in this 589 // instruction are now dead. 590 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 591 MachineOperand &MO = MI->getOperand(i); 592 if (!MO.isReg()) continue; 593 unsigned Reg = MO.getReg(); 594 if (Reg == 0) continue; 595 if (!MO.isDef()) continue; 596 // Ignore two-addr defs. 597 if (MI->isRegReDefinedByTwoAddr(i)) continue; 598 599 DefIndices[Reg] = Count; 600 KillIndices[Reg] = ~0u; 601 Classes[Reg] = 0; 602 RegRefs.erase(Reg); 603 // Repeat, for all subregs. 604 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 605 *Subreg; ++Subreg) { 606 unsigned SubregReg = *Subreg; 607 DefIndices[SubregReg] = Count; 608 KillIndices[SubregReg] = ~0u; 609 Classes[SubregReg] = 0; 610 RegRefs.erase(SubregReg); 611 } 612 // Conservatively mark super-registers as unusable. 613 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 614 *Super; ++Super) { 615 unsigned SuperReg = *Super; 616 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 617 } 618 } 619 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 620 MachineOperand &MO = MI->getOperand(i); 621 if (!MO.isReg()) continue; 622 unsigned Reg = MO.getReg(); 623 if (Reg == 0) continue; 624 if (!MO.isUse()) continue; 625 626 const TargetRegisterClass *NewRC = 627 getInstrOperandRegClass(TRI, MI->getDesc(), i); 628 629 // For now, only allow the register to be changed if its register 630 // class is consistent across all uses. 631 if (!Classes[Reg] && NewRC) 632 Classes[Reg] = NewRC; 633 else if (!NewRC || Classes[Reg] != NewRC) 634 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 635 636 RegRefs.insert(std::make_pair(Reg, &MO)); 637 638 // It wasn't previously live but now it is, this is a kill. 639 if (KillIndices[Reg] == ~0u) { 640 KillIndices[Reg] = Count; 641 DefIndices[Reg] = ~0u; 642 } 643 // Repeat, for all aliases. 644 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 645 unsigned AliasReg = *Alias; 646 if (KillIndices[AliasReg] == ~0u) { 647 KillIndices[AliasReg] = Count; 648 DefIndices[AliasReg] = ~0u; 649 } 650 } 651 } 652 } 653 assert(Count == ~0u && "Count mismatch!"); 654 655 return Changed; 656} 657 658//===----------------------------------------------------------------------===// 659// Top-Down Scheduling 660//===----------------------------------------------------------------------===// 661 662/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 663/// the PendingQueue if the count reaches zero. Also update its cycle bound. 664void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 665 SUnit *SuccSU = SuccEdge->getSUnit(); 666 --SuccSU->NumPredsLeft; 667 668#ifndef NDEBUG 669 if (SuccSU->NumPredsLeft < 0) { 670 cerr << "*** Scheduling failed! ***\n"; 671 SuccSU->dump(this); 672 cerr << " has been released too many times!\n"; 673 assert(0); 674 } 675#endif 676 677 // Compute how many cycles it will be before this actually becomes 678 // available. This is the max of the start time of all predecessors plus 679 // their latencies. 680 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 681 682 if (SuccSU->NumPredsLeft == 0) { 683 PendingQueue.push_back(SuccSU); 684 } 685} 686 687/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 688/// count of its successors. If a successor pending count is zero, add it to 689/// the Available queue. 690void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 691 DOUT << "*** Scheduling [" << CurCycle << "]: "; 692 DEBUG(SU->dump(this)); 693 694 Sequence.push_back(SU); 695 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 696 SU->setDepthToAtLeast(CurCycle); 697 698 // Top down: release successors. 699 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 700 I != E; ++I) 701 ReleaseSucc(SU, &*I); 702 703 SU->isScheduled = true; 704 AvailableQueue.ScheduledNode(SU); 705} 706 707/// ListScheduleTopDown - The main loop of list scheduling for top-down 708/// schedulers. 709void SchedulePostRATDList::ListScheduleTopDown() { 710 unsigned CurCycle = 0; 711 712 // All leaves to Available queue. 713 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 714 // It is available if it has no predecessors. 715 if (SUnits[i].Preds.empty()) { 716 AvailableQueue.push(&SUnits[i]); 717 SUnits[i].isAvailable = true; 718 } 719 } 720 721 // While Available queue is not empty, grab the node with the highest 722 // priority. If it is not ready put it back. Schedule the node. 723 std::vector<SUnit*> NotReady; 724 Sequence.reserve(SUnits.size()); 725 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 726 // Check to see if any of the pending instructions are ready to issue. If 727 // so, add them to the available queue. 728 unsigned MinDepth = ~0u; 729 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 730 if (PendingQueue[i]->getDepth() <= CurCycle) { 731 AvailableQueue.push(PendingQueue[i]); 732 PendingQueue[i]->isAvailable = true; 733 PendingQueue[i] = PendingQueue.back(); 734 PendingQueue.pop_back(); 735 --i; --e; 736 } else if (PendingQueue[i]->getDepth() < MinDepth) 737 MinDepth = PendingQueue[i]->getDepth(); 738 } 739 740 // If there are no instructions available, don't try to issue anything, and 741 // don't advance the hazard recognizer. 742 if (AvailableQueue.empty()) { 743 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 744 continue; 745 } 746 747 SUnit *FoundSUnit = 0; 748 749 bool HasNoopHazards = false; 750 while (!AvailableQueue.empty()) { 751 SUnit *CurSUnit = AvailableQueue.pop(); 752 753 ScheduleHazardRecognizer::HazardType HT = 754 HazardRec->getHazardType(CurSUnit); 755 if (HT == ScheduleHazardRecognizer::NoHazard) { 756 FoundSUnit = CurSUnit; 757 break; 758 } 759 760 // Remember if this is a noop hazard. 761 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 762 763 NotReady.push_back(CurSUnit); 764 } 765 766 // Add the nodes that aren't ready back onto the available list. 767 if (!NotReady.empty()) { 768 AvailableQueue.push_all(NotReady); 769 NotReady.clear(); 770 } 771 772 // If we found a node to schedule, do it now. 773 if (FoundSUnit) { 774 ScheduleNodeTopDown(FoundSUnit, CurCycle); 775 HazardRec->EmitInstruction(FoundSUnit); 776 777 // If this is a pseudo-op node, we don't want to increment the current 778 // cycle. 779 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 780 ++CurCycle; 781 } else if (!HasNoopHazards) { 782 // Otherwise, we have a pipeline stall, but no other problem, just advance 783 // the current cycle and try again. 784 DOUT << "*** Advancing cycle, no work to do\n"; 785 HazardRec->AdvanceCycle(); 786 ++NumStalls; 787 ++CurCycle; 788 } else { 789 // Otherwise, we have no instructions to issue and we have instructions 790 // that will fault if we don't do this right. This is the case for 791 // processors without pipeline interlocks and other cases. 792 DOUT << "*** Emitting noop\n"; 793 HazardRec->EmitNoop(); 794 Sequence.push_back(0); // NULL here means noop 795 ++NumNoops; 796 ++CurCycle; 797 } 798 } 799 800#ifndef NDEBUG 801 VerifySchedule(/*isBottomUp=*/false); 802#endif 803} 804 805//===----------------------------------------------------------------------===// 806// Public Constructor Functions 807//===----------------------------------------------------------------------===// 808 809FunctionPass *llvm::createPostRAScheduler() { 810 return new PostRAScheduler(); 811} 812