PostRASchedulerList.cpp revision f7119393a97c2a10757084b6bc186380f8c19a73
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "llvm/CodeGen/Passes.h" 23#include "llvm/CodeGen/ScheduleDAGInstrs.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/ADT/Statistic.h" 37#include <map> 38using namespace llvm; 39 40STATISTIC(NumNoops, "Number of noops inserted"); 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break post-RA scheduling anti-dependencies"), 46 cl::init(true), cl::Hidden); 47 48static cl::opt<bool> 49EnablePostRAHazardAvoidance("avoid-hazards", 50 cl::desc("Enable simple hazard-avoidance"), 51 cl::init(true), cl::Hidden); 52 53namespace { 54 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 55 public: 56 static char ID; 57 PostRAScheduler() : MachineFunctionPass(&ID) {} 58 59 void getAnalysisUsage(AnalysisUsage &AU) const { 60 AU.addRequired<MachineDominatorTree>(); 61 AU.addPreserved<MachineDominatorTree>(); 62 AU.addRequired<MachineLoopInfo>(); 63 AU.addPreserved<MachineLoopInfo>(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 const char *getPassName() const { 68 return "Post RA top-down list latency scheduler"; 69 } 70 71 bool runOnMachineFunction(MachineFunction &Fn); 72 }; 73 char PostRAScheduler::ID = 0; 74 75 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 76 /// AvailableQueue - The priority queue to use for the available SUnits. 77 /// 78 LatencyPriorityQueue AvailableQueue; 79 80 /// PendingQueue - This contains all of the instructions whose operands have 81 /// been issued, but their results are not ready yet (due to the latency of 82 /// the operation). Once the operands becomes available, the instruction is 83 /// added to the AvailableQueue. 84 std::vector<SUnit*> PendingQueue; 85 86 /// Topo - A topological ordering for SUnits. 87 ScheduleDAGTopologicalSort Topo; 88 89 /// AllocatableSet - The set of allocatable registers. 90 /// We'll be ignoring anti-dependencies on non-allocatable registers, 91 /// because they may not be safe to break. 92 const BitVector AllocatableSet; 93 94 /// HazardRec - The hazard recognizer to use. 95 ScheduleHazardRecognizer *HazardRec; 96 97 public: 98 SchedulePostRATDList(MachineFunction &MF, 99 const MachineLoopInfo &MLI, 100 const MachineDominatorTree &MDT, 101 ScheduleHazardRecognizer *HR) 102 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 103 AllocatableSet(TRI->getAllocatableSet(MF)), 104 HazardRec(HR) {} 105 106 ~SchedulePostRATDList() { 107 delete HazardRec; 108 } 109 110 void Schedule(); 111 112 private: 113 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 114 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 115 void ListScheduleTopDown(); 116 bool BreakAntiDependencies(); 117 }; 118 119 /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses 120 /// a coarse classification and attempts to avoid that instructions of 121 /// a given class aren't grouped too densely together. 122 class SimpleHazardRecognizer : public ScheduleHazardRecognizer { 123 /// Class - A simple classification for SUnits. 124 enum Class { 125 Other, Load, Store 126 }; 127 128 /// Window - The Class values of the most recently issued 129 /// instructions. 130 Class Window[8]; 131 132 /// getClass - Classify the given SUnit. 133 Class getClass(const SUnit *SU) { 134 const MachineInstr *MI = SU->getInstr(); 135 const TargetInstrDesc &TID = MI->getDesc(); 136 if (TID.mayLoad()) 137 return Load; 138 if (TID.mayStore()) 139 return Store; 140 return Other; 141 } 142 143 /// Step - Rotate the existing entries in Window and insert the 144 /// given class value in position as the most recent. 145 void Step(Class C) { 146 std::copy(Window+1, array_endof(Window), Window); 147 Window[array_lengthof(Window)-1] = C; 148 } 149 150 public: 151 SimpleHazardRecognizer() : Window() {} 152 153 virtual HazardType getHazardType(SUnit *SU) { 154 Class C = getClass(SU); 155 if (C == Other) 156 return NoHazard; 157 unsigned Score = 0; 158 for (unsigned i = 0; i != array_lengthof(Window); ++i) 159 if (Window[i] == C) 160 Score += i + 1; 161 if (Score > array_lengthof(Window) * 2) 162 return Hazard; 163 return NoHazard; 164 } 165 166 virtual void EmitInstruction(SUnit *SU) { 167 Step(getClass(SU)); 168 } 169 170 virtual void AdvanceCycle() { 171 Step(Other); 172 } 173 }; 174} 175 176bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 177 DOUT << "PostRAScheduler\n"; 178 179 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 180 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 181 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 182 new SimpleHazardRecognizer : 183 new ScheduleHazardRecognizer(); 184 185 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 186 187 // Loop over all of the basic blocks 188 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 189 MBB != MBBe; ++MBB) { 190 // Schedule each sequence of instructions not interrupted by a label 191 // or anything else that effectively needs to shut down scheduling. 192 MachineBasicBlock::iterator Current = MBB->begin(), End = MBB->end(); 193 for (MachineBasicBlock::iterator MI = Current; MI != End; ++MI) 194 if (MI->getDesc().isTerminator() || MI->isLabel()) { 195 Scheduler.Run(0, MBB, Current, MI); 196 Scheduler.EmitSchedule(); 197 Current = next(MI); 198 } 199 200 Scheduler.Run(0, MBB, Current, End); 201 Scheduler.EmitSchedule(); 202 } 203 204 return true; 205} 206 207/// Schedule - Schedule the DAG using list scheduling. 208void SchedulePostRATDList::Schedule() { 209 DOUT << "********** List Scheduling **********\n"; 210 211 // Build the scheduling graph. 212 BuildSchedGraph(); 213 214 if (EnableAntiDepBreaking) { 215 if (BreakAntiDependencies()) { 216 // We made changes. Update the dependency graph. 217 // Theoretically we could update the graph in place: 218 // When a live range is changed to use a different register, remove 219 // the def's anti-dependence *and* output-dependence edges due to 220 // that register, and add new anti-dependence and output-dependence 221 // edges based on the next live range of the register. 222 SUnits.clear(); 223 BuildSchedGraph(); 224 } 225 } 226 227 AvailableQueue.initNodes(SUnits); 228 229 ListScheduleTopDown(); 230 231 AvailableQueue.releaseState(); 232} 233 234/// getInstrOperandRegClass - Return register class of the operand of an 235/// instruction of the specified TargetInstrDesc. 236static const TargetRegisterClass* 237getInstrOperandRegClass(const TargetRegisterInfo *TRI, 238 const TargetInstrInfo *TII, const TargetInstrDesc &II, 239 unsigned Op) { 240 if (Op >= II.getNumOperands()) 241 return NULL; 242 if (II.OpInfo[Op].isLookupPtrRegClass()) 243 return TII->getPointerRegClass(); 244 return TRI->getRegClass(II.OpInfo[Op].RegClass); 245} 246 247/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 248/// critical path. 249static SDep *CriticalPathStep(SUnit *SU) { 250 SDep *Next = 0; 251 unsigned NextDepth = 0; 252 // Find the predecessor edge with the greatest depth. 253 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 254 P != PE; ++P) { 255 SUnit *PredSU = P->getSUnit(); 256 unsigned PredLatency = P->getLatency(); 257 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 258 // In the case of a latency tie, prefer an anti-dependency edge over 259 // other types of edges. 260 if (NextDepth < PredTotalLatency || 261 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 262 NextDepth = PredTotalLatency; 263 Next = &*P; 264 } 265 } 266 return Next; 267} 268 269/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 270/// of the ScheduleDAG and break them by renaming registers. 271/// 272bool SchedulePostRATDList::BreakAntiDependencies() { 273 // The code below assumes that there is at least one instruction, 274 // so just duck out immediately if the block is empty. 275 if (SUnits.empty()) return false; 276 277 // Find the node at the bottom of the critical path. 278 SUnit *Max = 0; 279 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 280 SUnit *SU = &SUnits[i]; 281 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 282 Max = SU; 283 } 284 285 DOUT << "Critical path has total latency " 286 << (Max->getDepth() + Max->Latency) << "\n"; 287 288 // Track progress along the critical path through the SUnit graph as we walk 289 // the instructions. 290 SUnit *CriticalPathSU = Max; 291 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 292 293 // For live regs that are only used in one register class in a live range, 294 // the register class. If the register is not live, the corresponding value 295 // is null. If the register is live but used in multiple register classes, 296 // the corresponding value is -1 casted to a pointer. 297 const TargetRegisterClass * 298 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 299 300 // Map registers to all their references within a live range. 301 std::multimap<unsigned, MachineOperand *> RegRefs; 302 303 // The index of the most recent kill (proceding bottom-up), or ~0u if 304 // the register is not live. 305 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 306 std::fill(KillIndices, array_endof(KillIndices), ~0u); 307 // The index of the most recent complete def (proceding bottom up), or ~0u if 308 // the register is live. 309 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 310 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 311 312 // Determine the live-out physregs for this block. 313 if (BB->back().getDesc().isReturn()) 314 // In a return block, examine the function live-out regs. 315 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 316 E = MRI.liveout_end(); I != E; ++I) { 317 unsigned Reg = *I; 318 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 319 KillIndices[Reg] = BB->size(); 320 DefIndices[Reg] = ~0u; 321 // Repeat, for all aliases. 322 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 323 unsigned AliasReg = *Alias; 324 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 325 KillIndices[AliasReg] = BB->size(); 326 DefIndices[AliasReg] = ~0u; 327 } 328 } 329 else 330 // In a non-return block, examine the live-in regs of all successors. 331 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 332 SE = BB->succ_end(); SI != SE; ++SI) 333 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 334 E = (*SI)->livein_end(); I != E; ++I) { 335 unsigned Reg = *I; 336 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 337 KillIndices[Reg] = BB->size(); 338 DefIndices[Reg] = ~0u; 339 // Repeat, for all aliases. 340 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 341 unsigned AliasReg = *Alias; 342 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 343 KillIndices[AliasReg] = BB->size(); 344 DefIndices[AliasReg] = ~0u; 345 } 346 } 347 348 // Consider callee-saved registers as live-out, since we're running after 349 // prologue/epilogue insertion so there's no way to add additional 350 // saved registers. 351 // 352 // TODO: If the callee saves and restores these, then we can potentially 353 // use them between the save and the restore. To do that, we could scan 354 // the exit blocks to see which of these registers are defined. 355 // Alternatively, callee-saved registers that aren't saved and restored 356 // could be marked live-in in every block. 357 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 358 unsigned Reg = *I; 359 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 360 KillIndices[Reg] = BB->size(); 361 DefIndices[Reg] = ~0u; 362 // Repeat, for all aliases. 363 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 364 unsigned AliasReg = *Alias; 365 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 366 KillIndices[AliasReg] = BB->size(); 367 DefIndices[AliasReg] = ~0u; 368 } 369 } 370 371 // Consider this pattern: 372 // A = ... 373 // ... = A 374 // A = ... 375 // ... = A 376 // A = ... 377 // ... = A 378 // A = ... 379 // ... = A 380 // There are three anti-dependencies here, and without special care, 381 // we'd break all of them using the same register: 382 // A = ... 383 // ... = A 384 // B = ... 385 // ... = B 386 // B = ... 387 // ... = B 388 // B = ... 389 // ... = B 390 // because at each anti-dependence, B is the first register that 391 // isn't A which is free. This re-introduces anti-dependencies 392 // at all but one of the original anti-dependencies that we were 393 // trying to break. To avoid this, keep track of the most recent 394 // register that each register was replaced with, avoid avoid 395 // using it to repair an anti-dependence on the same register. 396 // This lets us produce this: 397 // A = ... 398 // ... = A 399 // B = ... 400 // ... = B 401 // C = ... 402 // ... = C 403 // B = ... 404 // ... = B 405 // This still has an anti-dependence on B, but at least it isn't on the 406 // original critical path. 407 // 408 // TODO: If we tracked more than one register here, we could potentially 409 // fix that remaining critical edge too. This is a little more involved, 410 // because unlike the most recent register, less recent registers should 411 // still be considered, though only if no other registers are available. 412 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 413 414 // Attempt to break anti-dependence edges on the critical path. Walk the 415 // instructions from the bottom up, tracking information about liveness 416 // as we go to help determine which registers are available. 417 bool Changed = false; 418 unsigned Count = BB->size() - 1; 419 for (MachineBasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); 420 I != E; ++I, --Count) { 421 MachineInstr *MI = &*I; 422 423 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 424 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 425 // is left behind appearing to clobber the super-register, while the 426 // subregister needs to remain live. So we just ignore them. 427 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 428 continue; 429 430 // Check if this instruction has a dependence on the critical path that 431 // is an anti-dependence that we may be able to break. If it is, set 432 // AntiDepReg to the non-zero register associated with the anti-dependence. 433 // 434 // We limit our attention to the critical path as a heuristic to avoid 435 // breaking anti-dependence edges that aren't going to significantly 436 // impact the overall schedule. There are a limited number of registers 437 // and we want to save them for the important edges. 438 // 439 // TODO: Instructions with multiple defs could have multiple 440 // anti-dependencies. The current code here only knows how to break one 441 // edge per instruction. Note that we'd have to be able to break all of 442 // the anti-dependencies in an instruction in order to be effective. 443 unsigned AntiDepReg = 0; 444 if (MI == CriticalPathMI) { 445 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 446 SUnit *NextSU = Edge->getSUnit(); 447 448 // Only consider anti-dependence edges. 449 if (Edge->getKind() == SDep::Anti) { 450 AntiDepReg = Edge->getReg(); 451 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 452 // Don't break anti-dependencies on non-allocatable registers. 453 if (!AllocatableSet.test(AntiDepReg)) 454 AntiDepReg = 0; 455 else { 456 // If the SUnit has other dependencies on the SUnit that it 457 // anti-depends on, don't bother breaking the anti-dependency 458 // since those edges would prevent such units from being 459 // scheduled past each other regardless. 460 // 461 // Also, if there are dependencies on other SUnits with the 462 // same register as the anti-dependency, don't attempt to 463 // break it. 464 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 465 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 466 if (P->getSUnit() == NextSU ? 467 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 468 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 469 AntiDepReg = 0; 470 break; 471 } 472 } 473 } 474 CriticalPathSU = NextSU; 475 CriticalPathMI = CriticalPathSU->getInstr(); 476 } else { 477 // We've reached the end of the critical path. 478 CriticalPathSU = 0; 479 CriticalPathMI = 0; 480 } 481 } 482 483 // Scan the register operands for this instruction and update 484 // Classes and RegRefs. 485 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 486 MachineOperand &MO = MI->getOperand(i); 487 if (!MO.isReg()) continue; 488 unsigned Reg = MO.getReg(); 489 if (Reg == 0) continue; 490 const TargetRegisterClass *NewRC = 491 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 492 493 // If this instruction has a use of AntiDepReg, breaking it 494 // is invalid. 495 if (MO.isUse() && AntiDepReg == Reg) 496 AntiDepReg = 0; 497 498 // For now, only allow the register to be changed if its register 499 // class is consistent across all uses. 500 if (!Classes[Reg] && NewRC) 501 Classes[Reg] = NewRC; 502 else if (!NewRC || Classes[Reg] != NewRC) 503 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 504 505 // Now check for aliases. 506 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 507 // If an alias of the reg is used during the live range, give up. 508 // Note that this allows us to skip checking if AntiDepReg 509 // overlaps with any of the aliases, among other things. 510 unsigned AliasReg = *Alias; 511 if (Classes[AliasReg]) { 512 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 513 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 514 } 515 } 516 517 // If we're still willing to consider this register, note the reference. 518 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 519 RegRefs.insert(std::make_pair(Reg, &MO)); 520 } 521 522 // Determine AntiDepReg's register class, if it is live and is 523 // consistently used within a single class. 524 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 525 assert((AntiDepReg == 0 || RC != NULL) && 526 "Register should be live if it's causing an anti-dependence!"); 527 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 528 AntiDepReg = 0; 529 530 // Look for a suitable register to use to break the anti-depenence. 531 // 532 // TODO: Instead of picking the first free register, consider which might 533 // be the best. 534 if (AntiDepReg != 0) { 535 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 536 RE = RC->allocation_order_end(MF); R != RE; ++R) { 537 unsigned NewReg = *R; 538 // Don't replace a register with itself. 539 if (NewReg == AntiDepReg) continue; 540 // Don't replace a register with one that was recently used to repair 541 // an anti-dependence with this AntiDepReg, because that would 542 // re-introduce that anti-dependence. 543 if (NewReg == LastNewReg[AntiDepReg]) continue; 544 // If NewReg is dead and NewReg's most recent def is not before 545 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 546 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 547 "Kill and Def maps aren't consistent for AntiDepReg!"); 548 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 549 "Kill and Def maps aren't consistent for NewReg!"); 550 if (KillIndices[NewReg] == ~0u && 551 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 552 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 553 DOUT << "Breaking anti-dependence edge on " 554 << TRI->getName(AntiDepReg) 555 << " with " << RegRefs.count(AntiDepReg) << " references" 556 << " using " << TRI->getName(NewReg) << "!\n"; 557 558 // Update the references to the old register to refer to the new 559 // register. 560 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 561 std::multimap<unsigned, MachineOperand *>::iterator> 562 Range = RegRefs.equal_range(AntiDepReg); 563 for (std::multimap<unsigned, MachineOperand *>::iterator 564 Q = Range.first, QE = Range.second; Q != QE; ++Q) 565 Q->second->setReg(NewReg); 566 567 // We just went back in time and modified history; the 568 // liveness information for the anti-depenence reg is now 569 // inconsistent. Set the state as if it were dead. 570 Classes[NewReg] = Classes[AntiDepReg]; 571 DefIndices[NewReg] = DefIndices[AntiDepReg]; 572 KillIndices[NewReg] = KillIndices[AntiDepReg]; 573 574 Classes[AntiDepReg] = 0; 575 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 576 KillIndices[AntiDepReg] = ~0u; 577 578 RegRefs.erase(AntiDepReg); 579 Changed = true; 580 LastNewReg[AntiDepReg] = NewReg; 581 break; 582 } 583 } 584 } 585 586 // Update liveness. 587 // Proceding upwards, registers that are defed but not used in this 588 // instruction are now dead. 589 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 590 MachineOperand &MO = MI->getOperand(i); 591 if (!MO.isReg()) continue; 592 unsigned Reg = MO.getReg(); 593 if (Reg == 0) continue; 594 if (!MO.isDef()) continue; 595 // Ignore two-addr defs. 596 if (MI->isRegReDefinedByTwoAddr(i)) continue; 597 598 DefIndices[Reg] = Count; 599 KillIndices[Reg] = ~0u; 600 Classes[Reg] = 0; 601 RegRefs.erase(Reg); 602 // Repeat, for all subregs. 603 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 604 *Subreg; ++Subreg) { 605 unsigned SubregReg = *Subreg; 606 DefIndices[SubregReg] = Count; 607 KillIndices[SubregReg] = ~0u; 608 Classes[SubregReg] = 0; 609 RegRefs.erase(SubregReg); 610 } 611 // Conservatively mark super-registers as unusable. 612 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 613 *Super; ++Super) { 614 unsigned SuperReg = *Super; 615 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 616 } 617 } 618 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 619 MachineOperand &MO = MI->getOperand(i); 620 if (!MO.isReg()) continue; 621 unsigned Reg = MO.getReg(); 622 if (Reg == 0) continue; 623 if (!MO.isUse()) continue; 624 625 const TargetRegisterClass *NewRC = 626 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 627 628 // For now, only allow the register to be changed if its register 629 // class is consistent across all uses. 630 if (!Classes[Reg] && NewRC) 631 Classes[Reg] = NewRC; 632 else if (!NewRC || Classes[Reg] != NewRC) 633 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 634 635 RegRefs.insert(std::make_pair(Reg, &MO)); 636 637 // It wasn't previously live but now it is, this is a kill. 638 if (KillIndices[Reg] == ~0u) { 639 KillIndices[Reg] = Count; 640 DefIndices[Reg] = ~0u; 641 } 642 // Repeat, for all aliases. 643 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 644 unsigned AliasReg = *Alias; 645 if (KillIndices[AliasReg] == ~0u) { 646 KillIndices[AliasReg] = Count; 647 DefIndices[AliasReg] = ~0u; 648 } 649 } 650 } 651 } 652 assert(Count == ~0u && "Count mismatch!"); 653 654 return Changed; 655} 656 657//===----------------------------------------------------------------------===// 658// Top-Down Scheduling 659//===----------------------------------------------------------------------===// 660 661/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 662/// the PendingQueue if the count reaches zero. Also update its cycle bound. 663void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 664 SUnit *SuccSU = SuccEdge->getSUnit(); 665 --SuccSU->NumPredsLeft; 666 667#ifndef NDEBUG 668 if (SuccSU->NumPredsLeft < 0) { 669 cerr << "*** Scheduling failed! ***\n"; 670 SuccSU->dump(this); 671 cerr << " has been released too many times!\n"; 672 assert(0); 673 } 674#endif 675 676 // Compute how many cycles it will be before this actually becomes 677 // available. This is the max of the start time of all predecessors plus 678 // their latencies. 679 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 680 681 if (SuccSU->NumPredsLeft == 0) { 682 PendingQueue.push_back(SuccSU); 683 } 684} 685 686/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 687/// count of its successors. If a successor pending count is zero, add it to 688/// the Available queue. 689void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 690 DOUT << "*** Scheduling [" << CurCycle << "]: "; 691 DEBUG(SU->dump(this)); 692 693 Sequence.push_back(SU); 694 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 695 SU->setDepthToAtLeast(CurCycle); 696 697 // Top down: release successors. 698 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 699 I != E; ++I) 700 ReleaseSucc(SU, &*I); 701 702 SU->isScheduled = true; 703 AvailableQueue.ScheduledNode(SU); 704} 705 706/// ListScheduleTopDown - The main loop of list scheduling for top-down 707/// schedulers. 708void SchedulePostRATDList::ListScheduleTopDown() { 709 unsigned CurCycle = 0; 710 711 // All leaves to Available queue. 712 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 713 // It is available if it has no predecessors. 714 if (SUnits[i].Preds.empty()) { 715 AvailableQueue.push(&SUnits[i]); 716 SUnits[i].isAvailable = true; 717 } 718 } 719 720 // While Available queue is not empty, grab the node with the highest 721 // priority. If it is not ready put it back. Schedule the node. 722 std::vector<SUnit*> NotReady; 723 Sequence.reserve(SUnits.size()); 724 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 725 // Check to see if any of the pending instructions are ready to issue. If 726 // so, add them to the available queue. 727 unsigned MinDepth = ~0u; 728 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 729 if (PendingQueue[i]->getDepth() <= CurCycle) { 730 AvailableQueue.push(PendingQueue[i]); 731 PendingQueue[i]->isAvailable = true; 732 PendingQueue[i] = PendingQueue.back(); 733 PendingQueue.pop_back(); 734 --i; --e; 735 } else if (PendingQueue[i]->getDepth() < MinDepth) 736 MinDepth = PendingQueue[i]->getDepth(); 737 } 738 739 // If there are no instructions available, don't try to issue anything, and 740 // don't advance the hazard recognizer. 741 if (AvailableQueue.empty()) { 742 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 743 continue; 744 } 745 746 SUnit *FoundSUnit = 0; 747 748 bool HasNoopHazards = false; 749 while (!AvailableQueue.empty()) { 750 SUnit *CurSUnit = AvailableQueue.pop(); 751 752 ScheduleHazardRecognizer::HazardType HT = 753 HazardRec->getHazardType(CurSUnit); 754 if (HT == ScheduleHazardRecognizer::NoHazard) { 755 FoundSUnit = CurSUnit; 756 break; 757 } 758 759 // Remember if this is a noop hazard. 760 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 761 762 NotReady.push_back(CurSUnit); 763 } 764 765 // Add the nodes that aren't ready back onto the available list. 766 if (!NotReady.empty()) { 767 AvailableQueue.push_all(NotReady); 768 NotReady.clear(); 769 } 770 771 // If we found a node to schedule, do it now. 772 if (FoundSUnit) { 773 ScheduleNodeTopDown(FoundSUnit, CurCycle); 774 HazardRec->EmitInstruction(FoundSUnit); 775 776 // If this is a pseudo-op node, we don't want to increment the current 777 // cycle. 778 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 779 ++CurCycle; 780 } else if (!HasNoopHazards) { 781 // Otherwise, we have a pipeline stall, but no other problem, just advance 782 // the current cycle and try again. 783 DOUT << "*** Advancing cycle, no work to do\n"; 784 HazardRec->AdvanceCycle(); 785 ++NumStalls; 786 ++CurCycle; 787 } else { 788 // Otherwise, we have no instructions to issue and we have instructions 789 // that will fault if we don't do this right. This is the case for 790 // processors without pipeline interlocks and other cases. 791 DOUT << "*** Emitting noop\n"; 792 HazardRec->EmitNoop(); 793 Sequence.push_back(0); // NULL here means noop 794 ++NumNoops; 795 ++CurCycle; 796 } 797 } 798 799#ifndef NDEBUG 800 VerifySchedule(/*isBottomUp=*/false); 801#endif 802} 803 804//===----------------------------------------------------------------------===// 805// Public Constructor Functions 806//===----------------------------------------------------------------------===// 807 808FunctionPass *llvm::createPostRAScheduler() { 809 return new PostRAScheduler(); 810} 811