PostRASchedulerList.cpp revision 0dcda31cb40614d1adec10aad1a40a57ad581c69
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "llvm/CodeGen/Passes.h" 23#include "llvm/CodeGen/ScheduleDAGInstrs.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/ADT/Statistic.h" 37#include <map> 38using namespace llvm; 39 40STATISTIC(NumNoops, "Number of noops inserted"); 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break post-RA scheduling anti-dependencies"), 46 cl::init(true), cl::Hidden); 47 48static cl::opt<bool> 49EnablePostRAHazardAvoidance("avoid-hazards", 50 cl::desc("Enable simple hazard-avoidance"), 51 cl::init(true), cl::Hidden); 52 53namespace { 54 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 55 public: 56 static char ID; 57 PostRAScheduler() : MachineFunctionPass(&ID) {} 58 59 void getAnalysisUsage(AnalysisUsage &AU) const { 60 AU.addRequired<MachineDominatorTree>(); 61 AU.addPreserved<MachineDominatorTree>(); 62 AU.addRequired<MachineLoopInfo>(); 63 AU.addPreserved<MachineLoopInfo>(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 const char *getPassName() const { 68 return "Post RA top-down list latency scheduler"; 69 } 70 71 bool runOnMachineFunction(MachineFunction &Fn); 72 }; 73 char PostRAScheduler::ID = 0; 74 75 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 76 /// AvailableQueue - The priority queue to use for the available SUnits. 77 /// 78 LatencyPriorityQueue AvailableQueue; 79 80 /// PendingQueue - This contains all of the instructions whose operands have 81 /// been issued, but their results are not ready yet (due to the latency of 82 /// the operation). Once the operands becomes available, the instruction is 83 /// added to the AvailableQueue. 84 std::vector<SUnit*> PendingQueue; 85 86 /// Topo - A topological ordering for SUnits. 87 ScheduleDAGTopologicalSort Topo; 88 89 /// AllocatableSet - The set of allocatable registers. 90 /// We'll be ignoring anti-dependencies on non-allocatable registers, 91 /// because they may not be safe to break. 92 const BitVector AllocatableSet; 93 94 /// HazardRec - The hazard recognizer to use. 95 ScheduleHazardRecognizer *HazardRec; 96 97 public: 98 SchedulePostRATDList(MachineFunction &MF, 99 const MachineLoopInfo &MLI, 100 const MachineDominatorTree &MDT, 101 ScheduleHazardRecognizer *HR) 102 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 103 AllocatableSet(TRI->getAllocatableSet(MF)), 104 HazardRec(HR) {} 105 106 ~SchedulePostRATDList() { 107 delete HazardRec; 108 } 109 110 void Schedule(); 111 112 private: 113 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 114 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 115 void ListScheduleTopDown(); 116 bool BreakAntiDependencies(); 117 }; 118 119 /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses 120 /// a coarse classification and attempts to avoid that instructions of 121 /// a given class aren't grouped too densely together. 122 class SimpleHazardRecognizer : public ScheduleHazardRecognizer { 123 /// Class - A simple classification for SUnits. 124 enum Class { 125 Other, Load, Store 126 }; 127 128 /// Window - The Class values of the most recently issued 129 /// instructions. 130 Class Window[8]; 131 132 /// getClass - Classify the given SUnit. 133 Class getClass(const SUnit *SU) { 134 const MachineInstr *MI = SU->getInstr(); 135 const TargetInstrDesc &TID = MI->getDesc(); 136 if (TID.mayLoad()) 137 return Load; 138 if (TID.mayStore()) 139 return Store; 140 return Other; 141 } 142 143 /// Step - Rotate the existing entries in Window and insert the 144 /// given class value in position as the most recent. 145 void Step(Class C) { 146 std::copy(Window+1, array_endof(Window), Window); 147 Window[array_lengthof(Window)-1] = C; 148 } 149 150 public: 151 SimpleHazardRecognizer() : Window() {} 152 153 virtual HazardType getHazardType(SUnit *SU) { 154 Class C = getClass(SU); 155 if (C == Other) 156 return NoHazard; 157 unsigned Score = 0; 158 for (unsigned i = 0; i != array_lengthof(Window); ++i) 159 if (Window[i] == C) 160 Score += i + 1; 161 if (Score > array_lengthof(Window) * 2) 162 return Hazard; 163 return NoHazard; 164 } 165 166 virtual void EmitInstruction(SUnit *SU) { 167 Step(getClass(SU)); 168 } 169 170 virtual void AdvanceCycle() { 171 Step(Other); 172 } 173 }; 174} 175 176bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 177 DOUT << "PostRAScheduler\n"; 178 179 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 180 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 181 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 182 new SimpleHazardRecognizer : 183 new ScheduleHazardRecognizer(); 184 185 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 186 187 // Loop over all of the basic blocks 188 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 189 MBB != MBBe; ++MBB) { 190 // Schedule each sequence of instructions not interrupted by a label 191 // or anything else that effectively needs to shut down scheduling. 192 MachineBasicBlock::iterator Current = MBB->end(), Top = MBB->begin(); 193 for (MachineBasicBlock::iterator I = Current; I != Top; ) { 194 MachineInstr *MI = --I; 195 if (MI->getDesc().isTerminator() || MI->isLabel()) { 196 Scheduler.Run(0, MBB, next(I), Current); 197 Scheduler.EmitSchedule(); 198 Current = I; 199 } 200 } 201 202 Scheduler.Run(0, MBB, Top, Current); 203 Scheduler.EmitSchedule(); 204 } 205 206 return true; 207} 208 209/// Schedule - Schedule the DAG using list scheduling. 210void SchedulePostRATDList::Schedule() { 211 DOUT << "********** List Scheduling **********\n"; 212 213 // Build the scheduling graph. 214 BuildSchedGraph(); 215 216 if (EnableAntiDepBreaking) { 217 if (BreakAntiDependencies()) { 218 // We made changes. Update the dependency graph. 219 // Theoretically we could update the graph in place: 220 // When a live range is changed to use a different register, remove 221 // the def's anti-dependence *and* output-dependence edges due to 222 // that register, and add new anti-dependence and output-dependence 223 // edges based on the next live range of the register. 224 SUnits.clear(); 225 BuildSchedGraph(); 226 } 227 } 228 229 AvailableQueue.initNodes(SUnits); 230 231 ListScheduleTopDown(); 232 233 AvailableQueue.releaseState(); 234} 235 236/// getInstrOperandRegClass - Return register class of the operand of an 237/// instruction of the specified TargetInstrDesc. 238static const TargetRegisterClass* 239getInstrOperandRegClass(const TargetRegisterInfo *TRI, 240 const TargetInstrInfo *TII, const TargetInstrDesc &II, 241 unsigned Op) { 242 if (Op >= II.getNumOperands()) 243 return NULL; 244 if (II.OpInfo[Op].isLookupPtrRegClass()) 245 return TII->getPointerRegClass(); 246 return TRI->getRegClass(II.OpInfo[Op].RegClass); 247} 248 249/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 250/// critical path. 251static SDep *CriticalPathStep(SUnit *SU) { 252 SDep *Next = 0; 253 unsigned NextDepth = 0; 254 // Find the predecessor edge with the greatest depth. 255 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 256 P != PE; ++P) { 257 SUnit *PredSU = P->getSUnit(); 258 unsigned PredLatency = P->getLatency(); 259 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 260 // In the case of a latency tie, prefer an anti-dependency edge over 261 // other types of edges. 262 if (NextDepth < PredTotalLatency || 263 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 264 NextDepth = PredTotalLatency; 265 Next = &*P; 266 } 267 } 268 return Next; 269} 270 271/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 272/// of the ScheduleDAG and break them by renaming registers. 273/// 274bool SchedulePostRATDList::BreakAntiDependencies() { 275 // The code below assumes that there is at least one instruction, 276 // so just duck out immediately if the block is empty. 277 if (SUnits.empty()) return false; 278 279 // Find the node at the bottom of the critical path. 280 SUnit *Max = 0; 281 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 282 SUnit *SU = &SUnits[i]; 283 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 284 Max = SU; 285 } 286 287 DOUT << "Critical path has total latency " 288 << (Max->getDepth() + Max->Latency) << "\n"; 289 290 // Track progress along the critical path through the SUnit graph as we walk 291 // the instructions. 292 SUnit *CriticalPathSU = Max; 293 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 294 295 // For live regs that are only used in one register class in a live range, 296 // the register class. If the register is not live, the corresponding value 297 // is null. If the register is live but used in multiple register classes, 298 // the corresponding value is -1 casted to a pointer. 299 const TargetRegisterClass * 300 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 301 302 // Map registers to all their references within a live range. 303 std::multimap<unsigned, MachineOperand *> RegRefs; 304 305 // The index of the most recent kill (proceding bottom-up), or ~0u if 306 // the register is not live. 307 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 308 std::fill(KillIndices, array_endof(KillIndices), ~0u); 309 // The index of the most recent complete def (proceding bottom up), or ~0u if 310 // the register is live. 311 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 312 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 313 314 // Determine the live-out physregs for this block. 315 if (BB->back().getDesc().isReturn()) 316 // In a return block, examine the function live-out regs. 317 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 318 E = MRI.liveout_end(); I != E; ++I) { 319 unsigned Reg = *I; 320 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 321 KillIndices[Reg] = BB->size(); 322 DefIndices[Reg] = ~0u; 323 // Repeat, for all aliases. 324 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 325 unsigned AliasReg = *Alias; 326 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 327 KillIndices[AliasReg] = BB->size(); 328 DefIndices[AliasReg] = ~0u; 329 } 330 } 331 else 332 // In a non-return block, examine the live-in regs of all successors. 333 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 334 SE = BB->succ_end(); SI != SE; ++SI) 335 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 336 E = (*SI)->livein_end(); I != E; ++I) { 337 unsigned Reg = *I; 338 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 339 KillIndices[Reg] = BB->size(); 340 DefIndices[Reg] = ~0u; 341 // Repeat, for all aliases. 342 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 343 unsigned AliasReg = *Alias; 344 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 345 KillIndices[AliasReg] = BB->size(); 346 DefIndices[AliasReg] = ~0u; 347 } 348 } 349 350 // Consider callee-saved registers as live-out, since we're running after 351 // prologue/epilogue insertion so there's no way to add additional 352 // saved registers. 353 // 354 // TODO: If the callee saves and restores these, then we can potentially 355 // use them between the save and the restore. To do that, we could scan 356 // the exit blocks to see which of these registers are defined. 357 // Alternatively, callee-saved registers that aren't saved and restored 358 // could be marked live-in in every block. 359 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 360 unsigned Reg = *I; 361 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 362 KillIndices[Reg] = BB->size(); 363 DefIndices[Reg] = ~0u; 364 // Repeat, for all aliases. 365 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 366 unsigned AliasReg = *Alias; 367 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 368 KillIndices[AliasReg] = BB->size(); 369 DefIndices[AliasReg] = ~0u; 370 } 371 } 372 373 // Consider this pattern: 374 // A = ... 375 // ... = A 376 // A = ... 377 // ... = A 378 // A = ... 379 // ... = A 380 // A = ... 381 // ... = A 382 // There are three anti-dependencies here, and without special care, 383 // we'd break all of them using the same register: 384 // A = ... 385 // ... = A 386 // B = ... 387 // ... = B 388 // B = ... 389 // ... = B 390 // B = ... 391 // ... = B 392 // because at each anti-dependence, B is the first register that 393 // isn't A which is free. This re-introduces anti-dependencies 394 // at all but one of the original anti-dependencies that we were 395 // trying to break. To avoid this, keep track of the most recent 396 // register that each register was replaced with, avoid avoid 397 // using it to repair an anti-dependence on the same register. 398 // This lets us produce this: 399 // A = ... 400 // ... = A 401 // B = ... 402 // ... = B 403 // C = ... 404 // ... = C 405 // B = ... 406 // ... = B 407 // This still has an anti-dependence on B, but at least it isn't on the 408 // original critical path. 409 // 410 // TODO: If we tracked more than one register here, we could potentially 411 // fix that remaining critical edge too. This is a little more involved, 412 // because unlike the most recent register, less recent registers should 413 // still be considered, though only if no other registers are available. 414 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 415 416 // Attempt to break anti-dependence edges on the critical path. Walk the 417 // instructions from the bottom up, tracking information about liveness 418 // as we go to help determine which registers are available. 419 bool Changed = false; 420 unsigned Count = SUnits.size() - 1; 421 for (MachineBasicBlock::iterator I = End, E = Begin; 422 I != E; --Count) { 423 MachineInstr *MI = --I; 424 425 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 426 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 427 // is left behind appearing to clobber the super-register, while the 428 // subregister needs to remain live. So we just ignore them. 429 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 430 continue; 431 432 // Check if this instruction has a dependence on the critical path that 433 // is an anti-dependence that we may be able to break. If it is, set 434 // AntiDepReg to the non-zero register associated with the anti-dependence. 435 // 436 // We limit our attention to the critical path as a heuristic to avoid 437 // breaking anti-dependence edges that aren't going to significantly 438 // impact the overall schedule. There are a limited number of registers 439 // and we want to save them for the important edges. 440 // 441 // TODO: Instructions with multiple defs could have multiple 442 // anti-dependencies. The current code here only knows how to break one 443 // edge per instruction. Note that we'd have to be able to break all of 444 // the anti-dependencies in an instruction in order to be effective. 445 unsigned AntiDepReg = 0; 446 if (MI == CriticalPathMI) { 447 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 448 SUnit *NextSU = Edge->getSUnit(); 449 450 // Only consider anti-dependence edges. 451 if (Edge->getKind() == SDep::Anti) { 452 AntiDepReg = Edge->getReg(); 453 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 454 // Don't break anti-dependencies on non-allocatable registers. 455 if (!AllocatableSet.test(AntiDepReg)) 456 AntiDepReg = 0; 457 else { 458 // If the SUnit has other dependencies on the SUnit that it 459 // anti-depends on, don't bother breaking the anti-dependency 460 // since those edges would prevent such units from being 461 // scheduled past each other regardless. 462 // 463 // Also, if there are dependencies on other SUnits with the 464 // same register as the anti-dependency, don't attempt to 465 // break it. 466 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 467 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 468 if (P->getSUnit() == NextSU ? 469 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 470 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 471 AntiDepReg = 0; 472 break; 473 } 474 } 475 } 476 CriticalPathSU = NextSU; 477 CriticalPathMI = CriticalPathSU->getInstr(); 478 } else { 479 // We've reached the end of the critical path. 480 CriticalPathSU = 0; 481 CriticalPathMI = 0; 482 } 483 } 484 485 // Scan the register operands for this instruction and update 486 // Classes and RegRefs. 487 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 488 MachineOperand &MO = MI->getOperand(i); 489 if (!MO.isReg()) continue; 490 unsigned Reg = MO.getReg(); 491 if (Reg == 0) continue; 492 const TargetRegisterClass *NewRC = 493 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 494 495 // If this instruction has a use of AntiDepReg, breaking it 496 // is invalid. 497 if (MO.isUse() && AntiDepReg == Reg) 498 AntiDepReg = 0; 499 500 // For now, only allow the register to be changed if its register 501 // class is consistent across all uses. 502 if (!Classes[Reg] && NewRC) 503 Classes[Reg] = NewRC; 504 else if (!NewRC || Classes[Reg] != NewRC) 505 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 506 507 // Now check for aliases. 508 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 509 // If an alias of the reg is used during the live range, give up. 510 // Note that this allows us to skip checking if AntiDepReg 511 // overlaps with any of the aliases, among other things. 512 unsigned AliasReg = *Alias; 513 if (Classes[AliasReg]) { 514 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 515 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 516 } 517 } 518 519 // If we're still willing to consider this register, note the reference. 520 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 521 RegRefs.insert(std::make_pair(Reg, &MO)); 522 } 523 524 // Determine AntiDepReg's register class, if it is live and is 525 // consistently used within a single class. 526 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 527 assert((AntiDepReg == 0 || RC != NULL) && 528 "Register should be live if it's causing an anti-dependence!"); 529 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 530 AntiDepReg = 0; 531 532 // Look for a suitable register to use to break the anti-depenence. 533 // 534 // TODO: Instead of picking the first free register, consider which might 535 // be the best. 536 if (AntiDepReg != 0) { 537 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 538 RE = RC->allocation_order_end(MF); R != RE; ++R) { 539 unsigned NewReg = *R; 540 // Don't replace a register with itself. 541 if (NewReg == AntiDepReg) continue; 542 // Don't replace a register with one that was recently used to repair 543 // an anti-dependence with this AntiDepReg, because that would 544 // re-introduce that anti-dependence. 545 if (NewReg == LastNewReg[AntiDepReg]) continue; 546 // If NewReg is dead and NewReg's most recent def is not before 547 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 548 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 549 "Kill and Def maps aren't consistent for AntiDepReg!"); 550 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 551 "Kill and Def maps aren't consistent for NewReg!"); 552 if (KillIndices[NewReg] == ~0u && 553 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 554 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 555 DOUT << "Breaking anti-dependence edge on " 556 << TRI->getName(AntiDepReg) 557 << " with " << RegRefs.count(AntiDepReg) << " references" 558 << " using " << TRI->getName(NewReg) << "!\n"; 559 560 // Update the references to the old register to refer to the new 561 // register. 562 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 563 std::multimap<unsigned, MachineOperand *>::iterator> 564 Range = RegRefs.equal_range(AntiDepReg); 565 for (std::multimap<unsigned, MachineOperand *>::iterator 566 Q = Range.first, QE = Range.second; Q != QE; ++Q) 567 Q->second->setReg(NewReg); 568 569 // We just went back in time and modified history; the 570 // liveness information for the anti-depenence reg is now 571 // inconsistent. Set the state as if it were dead. 572 Classes[NewReg] = Classes[AntiDepReg]; 573 DefIndices[NewReg] = DefIndices[AntiDepReg]; 574 KillIndices[NewReg] = KillIndices[AntiDepReg]; 575 576 Classes[AntiDepReg] = 0; 577 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 578 KillIndices[AntiDepReg] = ~0u; 579 580 RegRefs.erase(AntiDepReg); 581 Changed = true; 582 LastNewReg[AntiDepReg] = NewReg; 583 break; 584 } 585 } 586 } 587 588 // Update liveness. 589 // Proceding upwards, registers that are defed but not used in this 590 // instruction are now dead. 591 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 592 MachineOperand &MO = MI->getOperand(i); 593 if (!MO.isReg()) continue; 594 unsigned Reg = MO.getReg(); 595 if (Reg == 0) continue; 596 if (!MO.isDef()) continue; 597 // Ignore two-addr defs. 598 if (MI->isRegReDefinedByTwoAddr(i)) continue; 599 600 DefIndices[Reg] = Count; 601 KillIndices[Reg] = ~0u; 602 Classes[Reg] = 0; 603 RegRefs.erase(Reg); 604 // Repeat, for all subregs. 605 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 606 *Subreg; ++Subreg) { 607 unsigned SubregReg = *Subreg; 608 DefIndices[SubregReg] = Count; 609 KillIndices[SubregReg] = ~0u; 610 Classes[SubregReg] = 0; 611 RegRefs.erase(SubregReg); 612 } 613 // Conservatively mark super-registers as unusable. 614 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 615 *Super; ++Super) { 616 unsigned SuperReg = *Super; 617 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 618 } 619 } 620 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 621 MachineOperand &MO = MI->getOperand(i); 622 if (!MO.isReg()) continue; 623 unsigned Reg = MO.getReg(); 624 if (Reg == 0) continue; 625 if (!MO.isUse()) continue; 626 627 const TargetRegisterClass *NewRC = 628 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 629 630 // For now, only allow the register to be changed if its register 631 // class is consistent across all uses. 632 if (!Classes[Reg] && NewRC) 633 Classes[Reg] = NewRC; 634 else if (!NewRC || Classes[Reg] != NewRC) 635 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 636 637 RegRefs.insert(std::make_pair(Reg, &MO)); 638 639 // It wasn't previously live but now it is, this is a kill. 640 if (KillIndices[Reg] == ~0u) { 641 KillIndices[Reg] = Count; 642 DefIndices[Reg] = ~0u; 643 } 644 // Repeat, for all aliases. 645 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 646 unsigned AliasReg = *Alias; 647 if (KillIndices[AliasReg] == ~0u) { 648 KillIndices[AliasReg] = Count; 649 DefIndices[AliasReg] = ~0u; 650 } 651 } 652 } 653 } 654 assert(Count == ~0u && "Count mismatch!"); 655 656 return Changed; 657} 658 659//===----------------------------------------------------------------------===// 660// Top-Down Scheduling 661//===----------------------------------------------------------------------===// 662 663/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 664/// the PendingQueue if the count reaches zero. Also update its cycle bound. 665void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 666 SUnit *SuccSU = SuccEdge->getSUnit(); 667 --SuccSU->NumPredsLeft; 668 669#ifndef NDEBUG 670 if (SuccSU->NumPredsLeft < 0) { 671 cerr << "*** Scheduling failed! ***\n"; 672 SuccSU->dump(this); 673 cerr << " has been released too many times!\n"; 674 assert(0); 675 } 676#endif 677 678 // Compute how many cycles it will be before this actually becomes 679 // available. This is the max of the start time of all predecessors plus 680 // their latencies. 681 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 682 683 if (SuccSU->NumPredsLeft == 0) { 684 PendingQueue.push_back(SuccSU); 685 } 686} 687 688/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 689/// count of its successors. If a successor pending count is zero, add it to 690/// the Available queue. 691void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 692 DOUT << "*** Scheduling [" << CurCycle << "]: "; 693 DEBUG(SU->dump(this)); 694 695 Sequence.push_back(SU); 696 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 697 SU->setDepthToAtLeast(CurCycle); 698 699 // Top down: release successors. 700 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 701 I != E; ++I) 702 ReleaseSucc(SU, &*I); 703 704 SU->isScheduled = true; 705 AvailableQueue.ScheduledNode(SU); 706} 707 708/// ListScheduleTopDown - The main loop of list scheduling for top-down 709/// schedulers. 710void SchedulePostRATDList::ListScheduleTopDown() { 711 unsigned CurCycle = 0; 712 713 // All leaves to Available queue. 714 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 715 // It is available if it has no predecessors. 716 if (SUnits[i].Preds.empty()) { 717 AvailableQueue.push(&SUnits[i]); 718 SUnits[i].isAvailable = true; 719 } 720 } 721 722 // While Available queue is not empty, grab the node with the highest 723 // priority. If it is not ready put it back. Schedule the node. 724 std::vector<SUnit*> NotReady; 725 Sequence.reserve(SUnits.size()); 726 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 727 // Check to see if any of the pending instructions are ready to issue. If 728 // so, add them to the available queue. 729 unsigned MinDepth = ~0u; 730 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 731 if (PendingQueue[i]->getDepth() <= CurCycle) { 732 AvailableQueue.push(PendingQueue[i]); 733 PendingQueue[i]->isAvailable = true; 734 PendingQueue[i] = PendingQueue.back(); 735 PendingQueue.pop_back(); 736 --i; --e; 737 } else if (PendingQueue[i]->getDepth() < MinDepth) 738 MinDepth = PendingQueue[i]->getDepth(); 739 } 740 741 // If there are no instructions available, don't try to issue anything, and 742 // don't advance the hazard recognizer. 743 if (AvailableQueue.empty()) { 744 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 745 continue; 746 } 747 748 SUnit *FoundSUnit = 0; 749 750 bool HasNoopHazards = false; 751 while (!AvailableQueue.empty()) { 752 SUnit *CurSUnit = AvailableQueue.pop(); 753 754 ScheduleHazardRecognizer::HazardType HT = 755 HazardRec->getHazardType(CurSUnit); 756 if (HT == ScheduleHazardRecognizer::NoHazard) { 757 FoundSUnit = CurSUnit; 758 break; 759 } 760 761 // Remember if this is a noop hazard. 762 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 763 764 NotReady.push_back(CurSUnit); 765 } 766 767 // Add the nodes that aren't ready back onto the available list. 768 if (!NotReady.empty()) { 769 AvailableQueue.push_all(NotReady); 770 NotReady.clear(); 771 } 772 773 // If we found a node to schedule, do it now. 774 if (FoundSUnit) { 775 ScheduleNodeTopDown(FoundSUnit, CurCycle); 776 HazardRec->EmitInstruction(FoundSUnit); 777 778 // If this is a pseudo-op node, we don't want to increment the current 779 // cycle. 780 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 781 ++CurCycle; 782 } else if (!HasNoopHazards) { 783 // Otherwise, we have a pipeline stall, but no other problem, just advance 784 // the current cycle and try again. 785 DOUT << "*** Advancing cycle, no work to do\n"; 786 HazardRec->AdvanceCycle(); 787 ++NumStalls; 788 ++CurCycle; 789 } else { 790 // Otherwise, we have no instructions to issue and we have instructions 791 // that will fault if we don't do this right. This is the case for 792 // processors without pipeline interlocks and other cases. 793 DOUT << "*** Emitting noop\n"; 794 HazardRec->EmitNoop(); 795 Sequence.push_back(0); // NULL here means noop 796 ++NumNoops; 797 ++CurCycle; 798 } 799 } 800 801#ifndef NDEBUG 802 VerifySchedule(/*isBottomUp=*/false); 803#endif 804} 805 806//===----------------------------------------------------------------------===// 807// Public Constructor Functions 808//===----------------------------------------------------------------------===// 809 810FunctionPass *llvm::createPostRAScheduler() { 811 return new PostRAScheduler(); 812} 813