PostRASchedulerList.cpp revision 99603bb2e0d81b55ef3f4d219a3fecc067ced3b2
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "llvm/CodeGen/Passes.h" 23#include "llvm/CodeGen/ScheduleDAGInstrs.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/ADT/Statistic.h" 37#include <map> 38using namespace llvm; 39 40STATISTIC(NumNoops, "Number of noops inserted"); 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break post-RA scheduling anti-dependencies"), 46 cl::init(true), cl::Hidden); 47 48static cl::opt<bool> 49EnablePostRAHazardAvoidance("avoid-hazards", 50 cl::desc("Enable simple hazard-avoidance"), 51 cl::init(true), cl::Hidden); 52 53namespace { 54 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 55 public: 56 static char ID; 57 PostRAScheduler() : MachineFunctionPass(&ID) {} 58 59 void getAnalysisUsage(AnalysisUsage &AU) const { 60 AU.addRequired<MachineDominatorTree>(); 61 AU.addPreserved<MachineDominatorTree>(); 62 AU.addRequired<MachineLoopInfo>(); 63 AU.addPreserved<MachineLoopInfo>(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 const char *getPassName() const { 68 return "Post RA top-down list latency scheduler"; 69 } 70 71 bool runOnMachineFunction(MachineFunction &Fn); 72 }; 73 char PostRAScheduler::ID = 0; 74 75 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 76 /// AvailableQueue - The priority queue to use for the available SUnits. 77 /// 78 LatencyPriorityQueue AvailableQueue; 79 80 /// PendingQueue - This contains all of the instructions whose operands have 81 /// been issued, but their results are not ready yet (due to the latency of 82 /// the operation). Once the operands becomes available, the instruction is 83 /// added to the AvailableQueue. 84 std::vector<SUnit*> PendingQueue; 85 86 /// Topo - A topological ordering for SUnits. 87 ScheduleDAGTopologicalSort Topo; 88 89 /// AllocatableSet - The set of allocatable registers. 90 /// We'll be ignoring anti-dependencies on non-allocatable registers, 91 /// because they may not be safe to break. 92 const BitVector AllocatableSet; 93 94 /// HazardRec - The hazard recognizer to use. 95 ScheduleHazardRecognizer *HazardRec; 96 97 public: 98 SchedulePostRATDList(MachineFunction &MF, 99 const MachineLoopInfo &MLI, 100 const MachineDominatorTree &MDT, 101 ScheduleHazardRecognizer *HR) 102 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 103 AllocatableSet(TRI->getAllocatableSet(MF)), 104 HazardRec(HR) {} 105 106 ~SchedulePostRATDList() { 107 delete HazardRec; 108 } 109 110 void Schedule(); 111 112 private: 113 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 114 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 115 void ListScheduleTopDown(); 116 bool BreakAntiDependencies(); 117 }; 118 119 /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses 120 /// a coarse classification and attempts to avoid that instructions of 121 /// a given class aren't grouped too densely together. 122 class SimpleHazardRecognizer : public ScheduleHazardRecognizer { 123 /// Class - A simple classification for SUnits. 124 enum Class { 125 Other, Load, Store 126 }; 127 128 /// Window - The Class values of the most recently issued 129 /// instructions. 130 Class Window[8]; 131 132 /// getClass - Classify the given SUnit. 133 Class getClass(const SUnit *SU) { 134 const MachineInstr *MI = SU->getInstr(); 135 const TargetInstrDesc &TID = MI->getDesc(); 136 if (TID.mayLoad()) 137 return Load; 138 if (TID.mayStore()) 139 return Store; 140 return Other; 141 } 142 143 /// Step - Rotate the existing entries in Window and insert the 144 /// given class value in position as the most recent. 145 void Step(Class C) { 146 std::copy(Window+1, array_endof(Window), Window); 147 Window[array_lengthof(Window)-1] = C; 148 } 149 150 public: 151 SimpleHazardRecognizer() : Window() {} 152 153 virtual HazardType getHazardType(SUnit *SU) { 154 Class C = getClass(SU); 155 if (C == Other) 156 return NoHazard; 157 unsigned Score = 0; 158 for (int i = 0; i != array_lengthof(Window); ++i) 159 if (Window[i] == C) 160 Score += i + 1; 161 if (Score > array_lengthof(Window) * 2) 162 return Hazard; 163 return NoHazard; 164 } 165 166 virtual void EmitInstruction(SUnit *SU) { 167 Step(getClass(SU)); 168 } 169 170 virtual void AdvanceCycle() { 171 Step(Other); 172 } 173 }; 174} 175 176bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 177 DOUT << "PostRAScheduler\n"; 178 179 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 180 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 181 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 182 new SimpleHazardRecognizer : 183 new ScheduleHazardRecognizer(); 184 185 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 186 187 // Loop over all of the basic blocks 188 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 189 MBB != MBBe; ++MBB) { 190 191 Scheduler.Run(0, MBB); 192 193 Scheduler.EmitSchedule(); 194 } 195 196 return true; 197} 198 199/// Schedule - Schedule the DAG using list scheduling. 200void SchedulePostRATDList::Schedule() { 201 DOUT << "********** List Scheduling **********\n"; 202 203 // Build the scheduling graph. 204 BuildSchedGraph(); 205 206 if (EnableAntiDepBreaking) { 207 if (BreakAntiDependencies()) { 208 // We made changes. Update the dependency graph. 209 // Theoretically we could update the graph in place: 210 // When a live range is changed to use a different register, remove 211 // the def's anti-dependence *and* output-dependence edges due to 212 // that register, and add new anti-dependence and output-dependence 213 // edges based on the next live range of the register. 214 SUnits.clear(); 215 BuildSchedGraph(); 216 } 217 } 218 219 AvailableQueue.initNodes(SUnits); 220 221 ListScheduleTopDown(); 222 223 AvailableQueue.releaseState(); 224} 225 226/// getInstrOperandRegClass - Return register class of the operand of an 227/// instruction of the specified TargetInstrDesc. 228static const TargetRegisterClass* 229getInstrOperandRegClass(const TargetRegisterInfo *TRI, 230 const TargetInstrInfo *TII, const TargetInstrDesc &II, 231 unsigned Op) { 232 if (Op >= II.getNumOperands()) 233 return NULL; 234 if (II.OpInfo[Op].isLookupPtrRegClass()) 235 return TII->getPointerRegClass(); 236 return TRI->getRegClass(II.OpInfo[Op].RegClass); 237} 238 239/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 240/// critical path. 241static SDep *CriticalPathStep(SUnit *SU) { 242 SDep *Next = 0; 243 unsigned NextDepth = 0; 244 // Find the predecessor edge with the greatest depth. 245 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 246 P != PE; ++P) { 247 SUnit *PredSU = P->getSUnit(); 248 unsigned PredLatency = P->getLatency(); 249 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 250 // In the case of a latency tie, prefer an anti-dependency edge over 251 // other types of edges. 252 if (NextDepth < PredTotalLatency || 253 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 254 NextDepth = PredTotalLatency; 255 Next = &*P; 256 } 257 } 258 return Next; 259} 260 261/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 262/// of the ScheduleDAG and break them by renaming registers. 263/// 264bool SchedulePostRATDList::BreakAntiDependencies() { 265 // The code below assumes that there is at least one instruction, 266 // so just duck out immediately if the block is empty. 267 if (BB->empty()) return false; 268 269 // Find the node at the bottom of the critical path. 270 SUnit *Max = 0; 271 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 272 SUnit *SU = &SUnits[i]; 273 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 274 Max = SU; 275 } 276 277 DOUT << "Critical path has total latency " 278 << (Max ? Max->getDepth() + Max->Latency : 0) << "\n"; 279 280 // Track progress along the critical path through the SUnit graph as we walk 281 // the instructions. 282 SUnit *CriticalPathSU = Max; 283 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 284 285 // For live regs that are only used in one register class in a live range, 286 // the register class. If the register is not live, the corresponding value 287 // is null. If the register is live but used in multiple register classes, 288 // the corresponding value is -1 casted to a pointer. 289 const TargetRegisterClass * 290 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 291 292 // Map registers to all their references within a live range. 293 std::multimap<unsigned, MachineOperand *> RegRefs; 294 295 // The index of the most recent kill (proceding bottom-up), or ~0u if 296 // the register is not live. 297 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 298 std::fill(KillIndices, array_endof(KillIndices), ~0u); 299 // The index of the most recent complete def (proceding bottom up), or ~0u if 300 // the register is live. 301 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 302 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 303 304 // Determine the live-out physregs for this block. 305 if (BB->back().getDesc().isReturn()) 306 // In a return block, examine the function live-out regs. 307 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 308 E = MRI.liveout_end(); I != E; ++I) { 309 unsigned Reg = *I; 310 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 311 KillIndices[Reg] = BB->size(); 312 DefIndices[Reg] = ~0u; 313 // Repeat, for all aliases. 314 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 315 unsigned AliasReg = *Alias; 316 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 317 KillIndices[AliasReg] = BB->size(); 318 DefIndices[AliasReg] = ~0u; 319 } 320 } 321 else 322 // In a non-return block, examine the live-in regs of all successors. 323 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 324 SE = BB->succ_end(); SI != SE; ++SI) 325 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 326 E = (*SI)->livein_end(); I != E; ++I) { 327 unsigned Reg = *I; 328 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 329 KillIndices[Reg] = BB->size(); 330 DefIndices[Reg] = ~0u; 331 // Repeat, for all aliases. 332 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 333 unsigned AliasReg = *Alias; 334 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 335 KillIndices[AliasReg] = BB->size(); 336 DefIndices[AliasReg] = ~0u; 337 } 338 } 339 340 // Consider callee-saved registers as live-out, since we're running after 341 // prologue/epilogue insertion so there's no way to add additional 342 // saved registers. 343 // 344 // TODO: If the callee saves and restores these, then we can potentially 345 // use them between the save and the restore. To do that, we could scan 346 // the exit blocks to see which of these registers are defined. 347 // Alternatively, callee-saved registers that aren't saved and restored 348 // could be marked live-in in every block. 349 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 350 unsigned Reg = *I; 351 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 352 KillIndices[Reg] = BB->size(); 353 DefIndices[Reg] = ~0u; 354 // Repeat, for all aliases. 355 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 356 unsigned AliasReg = *Alias; 357 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 358 KillIndices[AliasReg] = BB->size(); 359 DefIndices[AliasReg] = ~0u; 360 } 361 } 362 363 // Consider this pattern: 364 // A = ... 365 // ... = A 366 // A = ... 367 // ... = A 368 // A = ... 369 // ... = A 370 // A = ... 371 // ... = A 372 // There are three anti-dependencies here, and without special care, 373 // we'd break all of them using the same register: 374 // A = ... 375 // ... = A 376 // B = ... 377 // ... = B 378 // B = ... 379 // ... = B 380 // B = ... 381 // ... = B 382 // because at each anti-dependence, B is the first register that 383 // isn't A which is free. This re-introduces anti-dependencies 384 // at all but one of the original anti-dependencies that we were 385 // trying to break. To avoid this, keep track of the most recent 386 // register that each register was replaced with, avoid avoid 387 // using it to repair an anti-dependence on the same register. 388 // This lets us produce this: 389 // A = ... 390 // ... = A 391 // B = ... 392 // ... = B 393 // C = ... 394 // ... = C 395 // B = ... 396 // ... = B 397 // This still has an anti-dependence on B, but at least it isn't on the 398 // original critical path. 399 // 400 // TODO: If we tracked more than one register here, we could potentially 401 // fix that remaining critical edge too. This is a little more involved, 402 // because unlike the most recent register, less recent registers should 403 // still be considered, though only if no other registers are available. 404 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 405 406 // Attempt to break anti-dependence edges on the critical path. Walk the 407 // instructions from the bottom up, tracking information about liveness 408 // as we go to help determine which registers are available. 409 bool Changed = false; 410 unsigned Count = BB->size() - 1; 411 for (MachineBasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); 412 I != E; ++I, --Count) { 413 MachineInstr *MI = &*I; 414 415 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 416 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 417 // is left behind appearing to clobber the super-register, while the 418 // subregister needs to remain live. So we just ignore them. 419 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 420 continue; 421 422 // Check if this instruction has a dependence on the critical path that 423 // is an anti-dependence that we may be able to break. If it is, set 424 // AntiDepReg to the non-zero register associated with the anti-dependence. 425 // 426 // We limit our attention to the critical path as a heuristic to avoid 427 // breaking anti-dependence edges that aren't going to significantly 428 // impact the overall schedule. There are a limited number of registers 429 // and we want to save them for the important edges. 430 // 431 // TODO: Instructions with multiple defs could have multiple 432 // anti-dependencies. The current code here only knows how to break one 433 // edge per instruction. Note that we'd have to be able to break all of 434 // the anti-dependencies in an instruction in order to be effective. 435 unsigned AntiDepReg = 0; 436 if (MI == CriticalPathMI) { 437 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 438 SUnit *NextSU = Edge->getSUnit(); 439 440 // Only consider anti-dependence edges. 441 if (Edge->getKind() == SDep::Anti) { 442 AntiDepReg = Edge->getReg(); 443 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 444 // Don't break anti-dependencies on non-allocatable registers. 445 if (AllocatableSet.test(AntiDepReg)) { 446 // If the SUnit has other dependencies on the SUnit that it 447 // anti-depends on, don't bother breaking the anti-dependency 448 // since those edges would prevent such units from being 449 // scheduled past each other regardless. 450 // 451 // Also, if there are dependencies on other SUnits with the 452 // same register as the anti-dependency, don't attempt to 453 // break it. 454 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 455 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 456 if (P->getSUnit() == NextSU ? 457 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 458 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 459 AntiDepReg = 0; 460 break; 461 } 462 } 463 } 464 CriticalPathSU = NextSU; 465 CriticalPathMI = CriticalPathSU->getInstr(); 466 } else { 467 // We've reached the end of the critical path. 468 CriticalPathSU = 0; 469 CriticalPathMI = 0; 470 } 471 } 472 473 // Scan the register operands for this instruction and update 474 // Classes and RegRefs. 475 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 476 MachineOperand &MO = MI->getOperand(i); 477 if (!MO.isReg()) continue; 478 unsigned Reg = MO.getReg(); 479 if (Reg == 0) continue; 480 const TargetRegisterClass *NewRC = 481 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 482 483 // If this instruction has a use of AntiDepReg, breaking it 484 // is invalid. 485 if (MO.isUse() && AntiDepReg == Reg) 486 AntiDepReg = 0; 487 488 // For now, only allow the register to be changed if its register 489 // class is consistent across all uses. 490 if (!Classes[Reg] && NewRC) 491 Classes[Reg] = NewRC; 492 else if (!NewRC || Classes[Reg] != NewRC) 493 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 494 495 // Now check for aliases. 496 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 497 // If an alias of the reg is used during the live range, give up. 498 // Note that this allows us to skip checking if AntiDepReg 499 // overlaps with any of the aliases, among other things. 500 unsigned AliasReg = *Alias; 501 if (Classes[AliasReg]) { 502 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 503 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 504 } 505 } 506 507 // If we're still willing to consider this register, note the reference. 508 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 509 RegRefs.insert(std::make_pair(Reg, &MO)); 510 } 511 512 // Determine AntiDepReg's register class, if it is live and is 513 // consistently used within a single class. 514 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 515 assert((AntiDepReg == 0 || RC != NULL) && 516 "Register should be live if it's causing an anti-dependence!"); 517 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 518 AntiDepReg = 0; 519 520 // Look for a suitable register to use to break the anti-depenence. 521 // 522 // TODO: Instead of picking the first free register, consider which might 523 // be the best. 524 if (AntiDepReg != 0) { 525 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 526 RE = RC->allocation_order_end(MF); R != RE; ++R) { 527 unsigned NewReg = *R; 528 // Don't replace a register with itself. 529 if (NewReg == AntiDepReg) continue; 530 // Don't replace a register with one that was recently used to repair 531 // an anti-dependence with this AntiDepReg, because that would 532 // re-introduce that anti-dependence. 533 if (NewReg == LastNewReg[AntiDepReg]) continue; 534 // If NewReg is dead and NewReg's most recent def is not before 535 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 536 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 537 "Kill and Def maps aren't consistent for AntiDepReg!"); 538 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 539 "Kill and Def maps aren't consistent for NewReg!"); 540 if (KillIndices[NewReg] == ~0u && 541 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 542 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 543 DOUT << "Breaking anti-dependence edge on " 544 << TRI->getName(AntiDepReg) 545 << " with " << RegRefs.count(AntiDepReg) << " references" 546 << " using " << TRI->getName(NewReg) << "!\n"; 547 548 // Update the references to the old register to refer to the new 549 // register. 550 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 551 std::multimap<unsigned, MachineOperand *>::iterator> 552 Range = RegRefs.equal_range(AntiDepReg); 553 for (std::multimap<unsigned, MachineOperand *>::iterator 554 Q = Range.first, QE = Range.second; Q != QE; ++Q) 555 Q->second->setReg(NewReg); 556 557 // We just went back in time and modified history; the 558 // liveness information for the anti-depenence reg is now 559 // inconsistent. Set the state as if it were dead. 560 Classes[NewReg] = Classes[AntiDepReg]; 561 DefIndices[NewReg] = DefIndices[AntiDepReg]; 562 KillIndices[NewReg] = KillIndices[AntiDepReg]; 563 564 Classes[AntiDepReg] = 0; 565 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 566 KillIndices[AntiDepReg] = ~0u; 567 568 RegRefs.erase(AntiDepReg); 569 Changed = true; 570 LastNewReg[AntiDepReg] = NewReg; 571 break; 572 } 573 } 574 } 575 576 // Update liveness. 577 // Proceding upwards, registers that are defed but not used in this 578 // instruction are now dead. 579 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 580 MachineOperand &MO = MI->getOperand(i); 581 if (!MO.isReg()) continue; 582 unsigned Reg = MO.getReg(); 583 if (Reg == 0) continue; 584 if (!MO.isDef()) continue; 585 // Ignore two-addr defs. 586 if (MI->isRegReDefinedByTwoAddr(i)) continue; 587 588 DefIndices[Reg] = Count; 589 KillIndices[Reg] = ~0u; 590 Classes[Reg] = 0; 591 RegRefs.erase(Reg); 592 // Repeat, for all subregs. 593 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 594 *Subreg; ++Subreg) { 595 unsigned SubregReg = *Subreg; 596 DefIndices[SubregReg] = Count; 597 KillIndices[SubregReg] = ~0u; 598 Classes[SubregReg] = 0; 599 RegRefs.erase(SubregReg); 600 } 601 // Conservatively mark super-registers as unusable. 602 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 603 *Super; ++Super) { 604 unsigned SuperReg = *Super; 605 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 606 } 607 } 608 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 609 MachineOperand &MO = MI->getOperand(i); 610 if (!MO.isReg()) continue; 611 unsigned Reg = MO.getReg(); 612 if (Reg == 0) continue; 613 if (!MO.isUse()) continue; 614 615 const TargetRegisterClass *NewRC = 616 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 617 618 // For now, only allow the register to be changed if its register 619 // class is consistent across all uses. 620 if (!Classes[Reg] && NewRC) 621 Classes[Reg] = NewRC; 622 else if (!NewRC || Classes[Reg] != NewRC) 623 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 624 625 RegRefs.insert(std::make_pair(Reg, &MO)); 626 627 // It wasn't previously live but now it is, this is a kill. 628 if (KillIndices[Reg] == ~0u) { 629 KillIndices[Reg] = Count; 630 DefIndices[Reg] = ~0u; 631 } 632 // Repeat, for all aliases. 633 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 634 unsigned AliasReg = *Alias; 635 if (KillIndices[AliasReg] == ~0u) { 636 KillIndices[AliasReg] = Count; 637 DefIndices[AliasReg] = ~0u; 638 } 639 } 640 } 641 } 642 assert(Count == ~0u && "Count mismatch!"); 643 644 return Changed; 645} 646 647//===----------------------------------------------------------------------===// 648// Top-Down Scheduling 649//===----------------------------------------------------------------------===// 650 651/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 652/// the PendingQueue if the count reaches zero. Also update its cycle bound. 653void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 654 SUnit *SuccSU = SuccEdge->getSUnit(); 655 --SuccSU->NumPredsLeft; 656 657#ifndef NDEBUG 658 if (SuccSU->NumPredsLeft < 0) { 659 cerr << "*** Scheduling failed! ***\n"; 660 SuccSU->dump(this); 661 cerr << " has been released too many times!\n"; 662 assert(0); 663 } 664#endif 665 666 // Compute how many cycles it will be before this actually becomes 667 // available. This is the max of the start time of all predecessors plus 668 // their latencies. 669 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 670 671 if (SuccSU->NumPredsLeft == 0) { 672 PendingQueue.push_back(SuccSU); 673 } 674} 675 676/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 677/// count of its successors. If a successor pending count is zero, add it to 678/// the Available queue. 679void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 680 DOUT << "*** Scheduling [" << CurCycle << "]: "; 681 DEBUG(SU->dump(this)); 682 683 Sequence.push_back(SU); 684 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 685 SU->setDepthToAtLeast(CurCycle); 686 687 // Top down: release successors. 688 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 689 I != E; ++I) 690 ReleaseSucc(SU, &*I); 691 692 SU->isScheduled = true; 693 AvailableQueue.ScheduledNode(SU); 694} 695 696/// ListScheduleTopDown - The main loop of list scheduling for top-down 697/// schedulers. 698void SchedulePostRATDList::ListScheduleTopDown() { 699 unsigned CurCycle = 0; 700 701 // All leaves to Available queue. 702 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 703 // It is available if it has no predecessors. 704 if (SUnits[i].Preds.empty()) { 705 AvailableQueue.push(&SUnits[i]); 706 SUnits[i].isAvailable = true; 707 } 708 } 709 710 // While Available queue is not empty, grab the node with the highest 711 // priority. If it is not ready put it back. Schedule the node. 712 std::vector<SUnit*> NotReady; 713 Sequence.reserve(SUnits.size()); 714 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 715 // Check to see if any of the pending instructions are ready to issue. If 716 // so, add them to the available queue. 717 unsigned MinDepth = ~0u; 718 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 719 if (PendingQueue[i]->getDepth() <= CurCycle) { 720 AvailableQueue.push(PendingQueue[i]); 721 PendingQueue[i]->isAvailable = true; 722 PendingQueue[i] = PendingQueue.back(); 723 PendingQueue.pop_back(); 724 --i; --e; 725 } else if (PendingQueue[i]->getDepth() < MinDepth) 726 MinDepth = PendingQueue[i]->getDepth(); 727 } 728 729 // If there are no instructions available, don't try to issue anything, and 730 // don't advance the hazard recognizer. 731 if (AvailableQueue.empty()) { 732 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 733 continue; 734 } 735 736 SUnit *FoundSUnit = 0; 737 738 bool HasNoopHazards = false; 739 while (!AvailableQueue.empty()) { 740 SUnit *CurSUnit = AvailableQueue.pop(); 741 742 ScheduleHazardRecognizer::HazardType HT = 743 HazardRec->getHazardType(CurSUnit); 744 if (HT == ScheduleHazardRecognizer::NoHazard) { 745 FoundSUnit = CurSUnit; 746 break; 747 } 748 749 // Remember if this is a noop hazard. 750 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 751 752 NotReady.push_back(CurSUnit); 753 } 754 755 // Add the nodes that aren't ready back onto the available list. 756 if (!NotReady.empty()) { 757 AvailableQueue.push_all(NotReady); 758 NotReady.clear(); 759 } 760 761 // If we found a node to schedule, do it now. 762 if (FoundSUnit) { 763 ScheduleNodeTopDown(FoundSUnit, CurCycle); 764 HazardRec->EmitInstruction(FoundSUnit); 765 766 // If this is a pseudo-op node, we don't want to increment the current 767 // cycle. 768 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 769 ++CurCycle; 770 } else if (!HasNoopHazards) { 771 // Otherwise, we have a pipeline stall, but no other problem, just advance 772 // the current cycle and try again. 773 DOUT << "*** Advancing cycle, no work to do\n"; 774 HazardRec->AdvanceCycle(); 775 ++NumStalls; 776 ++CurCycle; 777 } else { 778 // Otherwise, we have no instructions to issue and we have instructions 779 // that will fault if we don't do this right. This is the case for 780 // processors without pipeline interlocks and other cases. 781 DOUT << "*** Emitting noop\n"; 782 HazardRec->EmitNoop(); 783 Sequence.push_back(0); // NULL here means noop 784 ++NumNoops; 785 ++CurCycle; 786 } 787 } 788 789#ifndef NDEBUG 790 VerifySchedule(/*isBottomUp=*/false); 791#endif 792} 793 794//===----------------------------------------------------------------------===// 795// Public Constructor Functions 796//===----------------------------------------------------------------------===// 797 798FunctionPass *llvm::createPostRAScheduler() { 799 return new PostRAScheduler(); 800} 801