PostRASchedulerList.cpp revision 8bff4af61219031345e7dae0c1840315e6bfab7f
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "ExactHazardRecognizer.h" 23#include "SimpleHazardRecognizer.h" 24#include "ScheduleDAGInstrs.h" 25#include "llvm/CodeGen/Passes.h" 26#include "llvm/CodeGen/LatencyPriorityQueue.h" 27#include "llvm/CodeGen/SchedulerRegistry.h" 28#include "llvm/CodeGen/MachineDominators.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineFunctionPass.h" 31#include "llvm/CodeGen/MachineLoopInfo.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 34#include "llvm/Target/TargetLowering.h" 35#include "llvm/Target/TargetMachine.h" 36#include "llvm/Target/TargetInstrInfo.h" 37#include "llvm/Target/TargetRegisterInfo.h" 38#include "llvm/Target/TargetSubtarget.h" 39#include "llvm/Support/Compiler.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/raw_ostream.h" 43#include "llvm/ADT/Statistic.h" 44#include <map> 45#include <set> 46using namespace llvm; 47 48STATISTIC(NumNoops, "Number of noops inserted"); 49STATISTIC(NumStalls, "Number of pipeline stalls"); 50 51// Post-RA scheduling is enabled with 52// TargetSubtarget.enablePostRAScheduler(). This flag can be used to 53// override the target. 54static cl::opt<bool> 55EnablePostRAScheduler("post-RA-scheduler", 56 cl::desc("Enable scheduling after register allocation"), 57 cl::init(false), cl::Hidden); 58static cl::opt<bool> 59EnableAntiDepBreaking("break-anti-dependencies", 60 cl::desc("Break post-RA scheduling anti-dependencies"), 61 cl::init(true), cl::Hidden); 62static cl::opt<bool> 63EnablePostRAHazardAvoidance("avoid-hazards", 64 cl::desc("Enable exact hazard avoidance"), 65 cl::init(true), cl::Hidden); 66 67// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 68static cl::opt<int> 69DebugDiv("postra-sched-debugdiv", 70 cl::desc("Debug control MBBs that are scheduled"), 71 cl::init(0), cl::Hidden); 72static cl::opt<int> 73DebugMod("postra-sched-debugmod", 74 cl::desc("Debug control MBBs that are scheduled"), 75 cl::init(0), cl::Hidden); 76 77namespace { 78 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 79 public: 80 static char ID; 81 PostRAScheduler() : MachineFunctionPass(&ID) {} 82 83 void getAnalysisUsage(AnalysisUsage &AU) const { 84 AU.setPreservesCFG(); 85 AU.addRequired<MachineDominatorTree>(); 86 AU.addPreserved<MachineDominatorTree>(); 87 AU.addRequired<MachineLoopInfo>(); 88 AU.addPreserved<MachineLoopInfo>(); 89 MachineFunctionPass::getAnalysisUsage(AU); 90 } 91 92 const char *getPassName() const { 93 return "Post RA top-down list latency scheduler"; 94 } 95 96 bool runOnMachineFunction(MachineFunction &Fn); 97 }; 98 char PostRAScheduler::ID = 0; 99 100 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 101 /// AvailableQueue - The priority queue to use for the available SUnits. 102 /// 103 LatencyPriorityQueue AvailableQueue; 104 105 /// PendingQueue - This contains all of the instructions whose operands have 106 /// been issued, but their results are not ready yet (due to the latency of 107 /// the operation). Once the operands becomes available, the instruction is 108 /// added to the AvailableQueue. 109 std::vector<SUnit*> PendingQueue; 110 111 /// Topo - A topological ordering for SUnits. 112 ScheduleDAGTopologicalSort Topo; 113 114 /// AllocatableSet - The set of allocatable registers. 115 /// We'll be ignoring anti-dependencies on non-allocatable registers, 116 /// because they may not be safe to break. 117 const BitVector AllocatableSet; 118 119 /// HazardRec - The hazard recognizer to use. 120 ScheduleHazardRecognizer *HazardRec; 121 122 /// Classes - For live regs that are only used in one register class in a 123 /// live range, the register class. If the register is not live, the 124 /// corresponding value is null. If the register is live but used in 125 /// multiple register classes, the corresponding value is -1 casted to a 126 /// pointer. 127 const TargetRegisterClass * 128 Classes[TargetRegisterInfo::FirstVirtualRegister]; 129 130 /// RegRegs - Map registers to all their references within a live range. 131 std::multimap<unsigned, MachineOperand *> RegRefs; 132 133 /// KillIndices - The index of the most recent kill (proceding bottom-up), 134 /// or ~0u if the register is not live. 135 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 136 137 /// DefIndices - The index of the most recent complete def (proceding bottom 138 /// up), or ~0u if the register is live. 139 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 140 141 /// KeepRegs - A set of registers which are live and cannot be changed to 142 /// break anti-dependencies. 143 SmallSet<unsigned, 4> KeepRegs; 144 145 public: 146 SchedulePostRATDList(MachineFunction &MF, 147 const MachineLoopInfo &MLI, 148 const MachineDominatorTree &MDT, 149 ScheduleHazardRecognizer *HR) 150 : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), 151 AllocatableSet(TRI->getAllocatableSet(MF)), 152 HazardRec(HR) {} 153 154 ~SchedulePostRATDList() { 155 delete HazardRec; 156 } 157 158 /// StartBlock - Initialize register live-range state for scheduling in 159 /// this block. 160 /// 161 void StartBlock(MachineBasicBlock *BB); 162 163 /// Schedule - Schedule the instruction range using list scheduling. 164 /// 165 void Schedule(); 166 167 /// FixupKills - Fix register kill flags that have been made 168 /// invalid due to scheduling 169 /// 170 void FixupKills(MachineBasicBlock *MBB); 171 172 /// Observe - Update liveness information to account for the current 173 /// instruction, which will not be scheduled. 174 /// 175 void Observe(MachineInstr *MI, unsigned Count); 176 177 /// FinishBlock - Clean up register live-range state. 178 /// 179 void FinishBlock(); 180 181 private: 182 void PrescanInstruction(MachineInstr *MI); 183 void ScanInstruction(MachineInstr *MI, unsigned Count); 184 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 185 void ReleaseSuccessors(SUnit *SU); 186 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 187 void ListScheduleTopDown(); 188 bool BreakAntiDependencies(); 189 unsigned findSuitableFreeRegister(unsigned AntiDepReg, 190 unsigned LastNewReg, 191 const TargetRegisterClass *); 192 void StartBlockForKills(MachineBasicBlock *BB); 193 194 // ToggleKillFlag - Toggle a register operand kill flag. Other 195 // adjustments may be made to the instruction if necessary. Return 196 // true if the operand has been deleted, false if not. 197 bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); 198 }; 199} 200 201/// isSchedulingBoundary - Test if the given instruction should be 202/// considered a scheduling boundary. This primarily includes labels 203/// and terminators. 204/// 205static bool isSchedulingBoundary(const MachineInstr *MI, 206 const MachineFunction &MF) { 207 // Terminators and labels can't be scheduled around. 208 if (MI->getDesc().isTerminator() || MI->isLabel()) 209 return true; 210 211 // Don't attempt to schedule around any instruction that modifies 212 // a stack-oriented pointer, as it's unlikely to be profitable. This 213 // saves compile time, because it doesn't require every single 214 // stack slot reference to depend on the instruction that does the 215 // modification. 216 const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); 217 if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) 218 return true; 219 220 return false; 221} 222 223bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 224 // Check for explicit enable/disable of post-ra scheduling. 225 if (EnablePostRAScheduler.getPosition() > 0) { 226 if (!EnablePostRAScheduler) 227 return true; 228 } else { 229 // Check that post-RA scheduling is enabled for this function 230 const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); 231 if (!ST.enablePostRAScheduler()) 232 return true; 233 } 234 235 DEBUG(errs() << "PostRAScheduler\n"); 236 237 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 238 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 239 const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); 240 ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? 241 (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : 242 (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); 243 244 SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); 245 246 // Loop over all of the basic blocks 247 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 248 MBB != MBBe; ++MBB) { 249#ifndef NDEBUG 250 // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod 251 if (DebugDiv > 0) { 252 static int bbcnt = 0; 253 if (bbcnt++ % DebugDiv != DebugMod) 254 continue; 255 errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << 256 ":MBB ID#" << MBB->getNumber() << " ***\n"; 257 } 258#endif 259 260 // Initialize register live-range state for scheduling in this block. 261 Scheduler.StartBlock(MBB); 262 263 // Schedule each sequence of instructions not interrupted by a label 264 // or anything else that effectively needs to shut down scheduling. 265 MachineBasicBlock::iterator Current = MBB->end(); 266 unsigned Count = MBB->size(), CurrentCount = Count; 267 for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { 268 MachineInstr *MI = prior(I); 269 if (isSchedulingBoundary(MI, Fn)) { 270 Scheduler.Run(MBB, I, Current, CurrentCount); 271 Scheduler.EmitSchedule(0); 272 Current = MI; 273 CurrentCount = Count - 1; 274 Scheduler.Observe(MI, CurrentCount); 275 } 276 I = MI; 277 --Count; 278 } 279 assert(Count == 0 && "Instruction count mismatch!"); 280 assert((MBB->begin() == Current || CurrentCount != 0) && 281 "Instruction count mismatch!"); 282 Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); 283 Scheduler.EmitSchedule(0); 284 285 // Clean up register live-range state. 286 Scheduler.FinishBlock(); 287 288 // Update register kills 289 Scheduler.FixupKills(MBB); 290 } 291 292 return true; 293} 294 295/// StartBlock - Initialize register live-range state for scheduling in 296/// this block. 297/// 298void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { 299 // Call the superclass. 300 ScheduleDAGInstrs::StartBlock(BB); 301 302 // Reset the hazard recognizer. 303 HazardRec->Reset(); 304 305 // Clear out the register class data. 306 std::fill(Classes, array_endof(Classes), 307 static_cast<const TargetRegisterClass *>(0)); 308 309 // Initialize the indices to indicate that no registers are live. 310 std::fill(KillIndices, array_endof(KillIndices), ~0u); 311 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 312 313 // Clear "do not change" set. 314 KeepRegs.clear(); 315 316 bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); 317 318 // Determine the live-out physregs for this block. 319 if (IsReturnBlock) { 320 // In a return block, examine the function live-out regs. 321 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 322 E = MRI.liveout_end(); I != E; ++I) { 323 unsigned Reg = *I; 324 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 325 KillIndices[Reg] = BB->size(); 326 DefIndices[Reg] = ~0u; 327 // Repeat, for all aliases. 328 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 329 unsigned AliasReg = *Alias; 330 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 331 KillIndices[AliasReg] = BB->size(); 332 DefIndices[AliasReg] = ~0u; 333 } 334 } 335 } else { 336 // In a non-return block, examine the live-in regs of all successors. 337 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 338 SE = BB->succ_end(); SI != SE; ++SI) 339 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 340 E = (*SI)->livein_end(); I != E; ++I) { 341 unsigned Reg = *I; 342 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 343 KillIndices[Reg] = BB->size(); 344 DefIndices[Reg] = ~0u; 345 // Repeat, for all aliases. 346 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 347 unsigned AliasReg = *Alias; 348 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 349 KillIndices[AliasReg] = BB->size(); 350 DefIndices[AliasReg] = ~0u; 351 } 352 } 353 } 354 355 // Mark live-out callee-saved registers. In a return block this is 356 // all callee-saved registers. In non-return this is any 357 // callee-saved register that is not saved in the prolog. 358 const MachineFrameInfo *MFI = MF.getFrameInfo(); 359 BitVector Pristine = MFI->getPristineRegs(BB); 360 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 361 unsigned Reg = *I; 362 if (!IsReturnBlock && !Pristine.test(Reg)) continue; 363 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 364 KillIndices[Reg] = BB->size(); 365 DefIndices[Reg] = ~0u; 366 // Repeat, for all aliases. 367 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 368 unsigned AliasReg = *Alias; 369 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 370 KillIndices[AliasReg] = BB->size(); 371 DefIndices[AliasReg] = ~0u; 372 } 373 } 374} 375 376/// Schedule - Schedule the instruction range using list scheduling. 377/// 378void SchedulePostRATDList::Schedule() { 379 DEBUG(errs() << "********** List Scheduling **********\n"); 380 381 // Build the scheduling graph. 382 BuildSchedGraph(); 383 384 if (EnableAntiDepBreaking) { 385 if (BreakAntiDependencies()) { 386 // We made changes. Update the dependency graph. 387 // Theoretically we could update the graph in place: 388 // When a live range is changed to use a different register, remove 389 // the def's anti-dependence *and* output-dependence edges due to 390 // that register, and add new anti-dependence and output-dependence 391 // edges based on the next live range of the register. 392 SUnits.clear(); 393 EntrySU = SUnit(); 394 ExitSU = SUnit(); 395 BuildSchedGraph(); 396 } 397 } 398 399 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 400 SUnits[su].dumpAll(this)); 401 402 AvailableQueue.initNodes(SUnits); 403 404 ListScheduleTopDown(); 405 406 AvailableQueue.releaseState(); 407} 408 409/// Observe - Update liveness information to account for the current 410/// instruction, which will not be scheduled. 411/// 412void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { 413 assert(Count < InsertPosIndex && "Instruction index out of expected range!"); 414 415 // Any register which was defined within the previous scheduling region 416 // may have been rescheduled and its lifetime may overlap with registers 417 // in ways not reflected in our current liveness state. For each such 418 // register, adjust the liveness state to be conservatively correct. 419 for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) 420 if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { 421 assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); 422 // Mark this register to be non-renamable. 423 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 424 // Move the def index to the end of the previous region, to reflect 425 // that the def could theoretically have been scheduled at the end. 426 DefIndices[Reg] = InsertPosIndex; 427 } 428 429 PrescanInstruction(MI); 430 ScanInstruction(MI, Count); 431} 432 433/// FinishBlock - Clean up register live-range state. 434/// 435void SchedulePostRATDList::FinishBlock() { 436 RegRefs.clear(); 437 438 // Call the superclass. 439 ScheduleDAGInstrs::FinishBlock(); 440} 441 442/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 443/// critical path. 444static SDep *CriticalPathStep(SUnit *SU) { 445 SDep *Next = 0; 446 unsigned NextDepth = 0; 447 // Find the predecessor edge with the greatest depth. 448 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 449 P != PE; ++P) { 450 SUnit *PredSU = P->getSUnit(); 451 unsigned PredLatency = P->getLatency(); 452 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 453 // In the case of a latency tie, prefer an anti-dependency edge over 454 // other types of edges. 455 if (NextDepth < PredTotalLatency || 456 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 457 NextDepth = PredTotalLatency; 458 Next = &*P; 459 } 460 } 461 return Next; 462} 463 464void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { 465 // Scan the register operands for this instruction and update 466 // Classes and RegRefs. 467 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 468 MachineOperand &MO = MI->getOperand(i); 469 if (!MO.isReg()) continue; 470 unsigned Reg = MO.getReg(); 471 if (Reg == 0) continue; 472 const TargetRegisterClass *NewRC = 0; 473 474 if (i < MI->getDesc().getNumOperands()) 475 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 476 477 // For now, only allow the register to be changed if its register 478 // class is consistent across all uses. 479 if (!Classes[Reg] && NewRC) 480 Classes[Reg] = NewRC; 481 else if (!NewRC || Classes[Reg] != NewRC) 482 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 483 484 // Now check for aliases. 485 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 486 // If an alias of the reg is used during the live range, give up. 487 // Note that this allows us to skip checking if AntiDepReg 488 // overlaps with any of the aliases, among other things. 489 unsigned AliasReg = *Alias; 490 if (Classes[AliasReg]) { 491 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 492 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 493 } 494 } 495 496 // If we're still willing to consider this register, note the reference. 497 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 498 RegRefs.insert(std::make_pair(Reg, &MO)); 499 500 // It's not safe to change register allocation for source operands of 501 // that have special allocation requirements. 502 if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { 503 if (KeepRegs.insert(Reg)) { 504 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 505 *Subreg; ++Subreg) 506 KeepRegs.insert(*Subreg); 507 } 508 } 509 } 510} 511 512void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, 513 unsigned Count) { 514 // Update liveness. 515 // Proceding upwards, registers that are defed but not used in this 516 // instruction are now dead. 517 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 518 MachineOperand &MO = MI->getOperand(i); 519 if (!MO.isReg()) continue; 520 unsigned Reg = MO.getReg(); 521 if (Reg == 0) continue; 522 if (!MO.isDef()) continue; 523 // Ignore two-addr defs. 524 if (MI->isRegTiedToUseOperand(i)) continue; 525 526 DefIndices[Reg] = Count; 527 KillIndices[Reg] = ~0u; 528 assert(((KillIndices[Reg] == ~0u) != 529 (DefIndices[Reg] == ~0u)) && 530 "Kill and Def maps aren't consistent for Reg!"); 531 KeepRegs.erase(Reg); 532 Classes[Reg] = 0; 533 RegRefs.erase(Reg); 534 // Repeat, for all subregs. 535 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 536 *Subreg; ++Subreg) { 537 unsigned SubregReg = *Subreg; 538 DefIndices[SubregReg] = Count; 539 KillIndices[SubregReg] = ~0u; 540 KeepRegs.erase(SubregReg); 541 Classes[SubregReg] = 0; 542 RegRefs.erase(SubregReg); 543 } 544 // Conservatively mark super-registers as unusable. 545 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 546 *Super; ++Super) { 547 unsigned SuperReg = *Super; 548 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 549 } 550 } 551 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 552 MachineOperand &MO = MI->getOperand(i); 553 if (!MO.isReg()) continue; 554 unsigned Reg = MO.getReg(); 555 if (Reg == 0) continue; 556 if (!MO.isUse()) continue; 557 558 const TargetRegisterClass *NewRC = 0; 559 if (i < MI->getDesc().getNumOperands()) 560 NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); 561 562 // For now, only allow the register to be changed if its register 563 // class is consistent across all uses. 564 if (!Classes[Reg] && NewRC) 565 Classes[Reg] = NewRC; 566 else if (!NewRC || Classes[Reg] != NewRC) 567 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 568 569 RegRefs.insert(std::make_pair(Reg, &MO)); 570 571 // It wasn't previously live but now it is, this is a kill. 572 if (KillIndices[Reg] == ~0u) { 573 KillIndices[Reg] = Count; 574 DefIndices[Reg] = ~0u; 575 assert(((KillIndices[Reg] == ~0u) != 576 (DefIndices[Reg] == ~0u)) && 577 "Kill and Def maps aren't consistent for Reg!"); 578 } 579 // Repeat, for all aliases. 580 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 581 unsigned AliasReg = *Alias; 582 if (KillIndices[AliasReg] == ~0u) { 583 KillIndices[AliasReg] = Count; 584 DefIndices[AliasReg] = ~0u; 585 } 586 } 587 } 588} 589 590unsigned 591SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg, 592 unsigned LastNewReg, 593 const TargetRegisterClass *RC) { 594 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), 595 RE = RC->allocation_order_end(MF); R != RE; ++R) { 596 unsigned NewReg = *R; 597 // Don't replace a register with itself. 598 if (NewReg == AntiDepReg) continue; 599 // Don't replace a register with one that was recently used to repair 600 // an anti-dependence with this AntiDepReg, because that would 601 // re-introduce that anti-dependence. 602 if (NewReg == LastNewReg) continue; 603 // If NewReg is dead and NewReg's most recent def is not before 604 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 605 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 606 "Kill and Def maps aren't consistent for AntiDepReg!"); 607 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 608 "Kill and Def maps aren't consistent for NewReg!"); 609 if (KillIndices[NewReg] != ~0u || 610 Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || 611 KillIndices[AntiDepReg] > DefIndices[NewReg]) 612 continue; 613 return NewReg; 614 } 615 616 // No registers are free and available! 617 return 0; 618} 619 620/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 621/// of the ScheduleDAG and break them by renaming registers. 622/// 623bool SchedulePostRATDList::BreakAntiDependencies() { 624 // The code below assumes that there is at least one instruction, 625 // so just duck out immediately if the block is empty. 626 if (SUnits.empty()) return false; 627 628 // Find the node at the bottom of the critical path. 629 SUnit *Max = 0; 630 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 631 SUnit *SU = &SUnits[i]; 632 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 633 Max = SU; 634 } 635 636 DEBUG(errs() << "Critical path has total latency " 637 << (Max->getDepth() + Max->Latency) << "\n"); 638 639 // Track progress along the critical path through the SUnit graph as we walk 640 // the instructions. 641 SUnit *CriticalPathSU = Max; 642 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 643 644 // Consider this pattern: 645 // A = ... 646 // ... = A 647 // A = ... 648 // ... = A 649 // A = ... 650 // ... = A 651 // A = ... 652 // ... = A 653 // There are three anti-dependencies here, and without special care, 654 // we'd break all of them using the same register: 655 // A = ... 656 // ... = A 657 // B = ... 658 // ... = B 659 // B = ... 660 // ... = B 661 // B = ... 662 // ... = B 663 // because at each anti-dependence, B is the first register that 664 // isn't A which is free. This re-introduces anti-dependencies 665 // at all but one of the original anti-dependencies that we were 666 // trying to break. To avoid this, keep track of the most recent 667 // register that each register was replaced with, avoid 668 // using it to repair an anti-dependence on the same register. 669 // This lets us produce this: 670 // A = ... 671 // ... = A 672 // B = ... 673 // ... = B 674 // C = ... 675 // ... = C 676 // B = ... 677 // ... = B 678 // This still has an anti-dependence on B, but at least it isn't on the 679 // original critical path. 680 // 681 // TODO: If we tracked more than one register here, we could potentially 682 // fix that remaining critical edge too. This is a little more involved, 683 // because unlike the most recent register, less recent registers should 684 // still be considered, though only if no other registers are available. 685 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 686 687 // Attempt to break anti-dependence edges on the critical path. Walk the 688 // instructions from the bottom up, tracking information about liveness 689 // as we go to help determine which registers are available. 690 bool Changed = false; 691 unsigned Count = InsertPosIndex - 1; 692 for (MachineBasicBlock::iterator I = InsertPos, E = Begin; 693 I != E; --Count) { 694 MachineInstr *MI = --I; 695 696 // Check if this instruction has a dependence on the critical path that 697 // is an anti-dependence that we may be able to break. If it is, set 698 // AntiDepReg to the non-zero register associated with the anti-dependence. 699 // 700 // We limit our attention to the critical path as a heuristic to avoid 701 // breaking anti-dependence edges that aren't going to significantly 702 // impact the overall schedule. There are a limited number of registers 703 // and we want to save them for the important edges. 704 // 705 // TODO: Instructions with multiple defs could have multiple 706 // anti-dependencies. The current code here only knows how to break one 707 // edge per instruction. Note that we'd have to be able to break all of 708 // the anti-dependencies in an instruction in order to be effective. 709 unsigned AntiDepReg = 0; 710 if (MI == CriticalPathMI) { 711 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 712 SUnit *NextSU = Edge->getSUnit(); 713 714 // Only consider anti-dependence edges. 715 if (Edge->getKind() == SDep::Anti) { 716 AntiDepReg = Edge->getReg(); 717 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 718 if (!AllocatableSet.test(AntiDepReg)) 719 // Don't break anti-dependencies on non-allocatable registers. 720 AntiDepReg = 0; 721 else if (KeepRegs.count(AntiDepReg)) 722 // Don't break anti-dependencies if an use down below requires 723 // this exact register. 724 AntiDepReg = 0; 725 else { 726 // If the SUnit has other dependencies on the SUnit that it 727 // anti-depends on, don't bother breaking the anti-dependency 728 // since those edges would prevent such units from being 729 // scheduled past each other regardless. 730 // 731 // Also, if there are dependencies on other SUnits with the 732 // same register as the anti-dependency, don't attempt to 733 // break it. 734 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 735 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 736 if (P->getSUnit() == NextSU ? 737 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 738 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 739 AntiDepReg = 0; 740 break; 741 } 742 } 743 } 744 CriticalPathSU = NextSU; 745 CriticalPathMI = CriticalPathSU->getInstr(); 746 } else { 747 // We've reached the end of the critical path. 748 CriticalPathSU = 0; 749 CriticalPathMI = 0; 750 } 751 } 752 753 PrescanInstruction(MI); 754 755 if (MI->getDesc().hasExtraDefRegAllocReq()) 756 // If this instruction's defs have special allocation requirement, don't 757 // break this anti-dependency. 758 AntiDepReg = 0; 759 else if (AntiDepReg) { 760 // If this instruction has a use of AntiDepReg, breaking it 761 // is invalid. 762 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 763 MachineOperand &MO = MI->getOperand(i); 764 if (!MO.isReg()) continue; 765 unsigned Reg = MO.getReg(); 766 if (Reg == 0) continue; 767 if (MO.isUse() && AntiDepReg == Reg) { 768 AntiDepReg = 0; 769 break; 770 } 771 } 772 } 773 774 // Determine AntiDepReg's register class, if it is live and is 775 // consistently used within a single class. 776 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 777 assert((AntiDepReg == 0 || RC != NULL) && 778 "Register should be live if it's causing an anti-dependence!"); 779 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 780 AntiDepReg = 0; 781 782 // Look for a suitable register to use to break the anti-depenence. 783 // 784 // TODO: Instead of picking the first free register, consider which might 785 // be the best. 786 if (AntiDepReg != 0) { 787 if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, 788 LastNewReg[AntiDepReg], 789 RC)) { 790 DEBUG(errs() << "Breaking anti-dependence edge on " 791 << TRI->getName(AntiDepReg) 792 << " with " << RegRefs.count(AntiDepReg) << " references" 793 << " using " << TRI->getName(NewReg) << "!\n"); 794 795 // Update the references to the old register to refer to the new 796 // register. 797 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 798 std::multimap<unsigned, MachineOperand *>::iterator> 799 Range = RegRefs.equal_range(AntiDepReg); 800 for (std::multimap<unsigned, MachineOperand *>::iterator 801 Q = Range.first, QE = Range.second; Q != QE; ++Q) 802 Q->second->setReg(NewReg); 803 804 // We just went back in time and modified history; the 805 // liveness information for the anti-depenence reg is now 806 // inconsistent. Set the state as if it were dead. 807 Classes[NewReg] = Classes[AntiDepReg]; 808 DefIndices[NewReg] = DefIndices[AntiDepReg]; 809 KillIndices[NewReg] = KillIndices[AntiDepReg]; 810 assert(((KillIndices[NewReg] == ~0u) != 811 (DefIndices[NewReg] == ~0u)) && 812 "Kill and Def maps aren't consistent for NewReg!"); 813 814 Classes[AntiDepReg] = 0; 815 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 816 KillIndices[AntiDepReg] = ~0u; 817 assert(((KillIndices[AntiDepReg] == ~0u) != 818 (DefIndices[AntiDepReg] == ~0u)) && 819 "Kill and Def maps aren't consistent for AntiDepReg!"); 820 821 RegRefs.erase(AntiDepReg); 822 Changed = true; 823 LastNewReg[AntiDepReg] = NewReg; 824 } 825 } 826 827 ScanInstruction(MI, Count); 828 } 829 830 return Changed; 831} 832 833/// StartBlockForKills - Initialize register live-range state for updating kills 834/// 835void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { 836 // Initialize the indices to indicate that no registers are live. 837 std::fill(KillIndices, array_endof(KillIndices), ~0u); 838 839 // Determine the live-out physregs for this block. 840 if (!BB->empty() && BB->back().getDesc().isReturn()) { 841 // In a return block, examine the function live-out regs. 842 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 843 E = MRI.liveout_end(); I != E; ++I) { 844 unsigned Reg = *I; 845 KillIndices[Reg] = BB->size(); 846 // Repeat, for all subregs. 847 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 848 *Subreg; ++Subreg) { 849 KillIndices[*Subreg] = BB->size(); 850 } 851 } 852 } 853 else { 854 // In a non-return block, examine the live-in regs of all successors. 855 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 856 SE = BB->succ_end(); SI != SE; ++SI) { 857 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 858 E = (*SI)->livein_end(); I != E; ++I) { 859 unsigned Reg = *I; 860 KillIndices[Reg] = BB->size(); 861 // Repeat, for all subregs. 862 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 863 *Subreg; ++Subreg) { 864 KillIndices[*Subreg] = BB->size(); 865 } 866 } 867 } 868 } 869} 870 871bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, 872 MachineOperand &MO) { 873 // Setting kill flag... 874 if (!MO.isKill()) { 875 MO.setIsKill(true); 876 return false; 877 } 878 879 // If MO itself is live, clear the kill flag... 880 if (KillIndices[MO.getReg()] != ~0u) { 881 MO.setIsKill(false); 882 return false; 883 } 884 885 // If any subreg of MO is live, then create an imp-def for that 886 // subreg and keep MO marked as killed. 887 MO.setIsKill(false); 888 bool AllDead = true; 889 const unsigned SuperReg = MO.getReg(); 890 for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); 891 *Subreg; ++Subreg) { 892 if (KillIndices[*Subreg] != ~0u) { 893 MI->addOperand(MachineOperand::CreateReg(*Subreg, 894 true /*IsDef*/, 895 true /*IsImp*/, 896 false /*IsKill*/, 897 false /*IsDead*/)); 898 AllDead = false; 899 } 900 } 901 902 if(AllDead) 903 MO.setIsKill(true); 904 return false; 905} 906 907/// FixupKills - Fix the register kill flags, they may have been made 908/// incorrect by instruction reordering. 909/// 910void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { 911 DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n'); 912 913 std::set<unsigned> killedRegs; 914 BitVector ReservedRegs = TRI->getReservedRegs(MF); 915 916 StartBlockForKills(MBB); 917 918 // Examine block from end to start... 919 unsigned Count = MBB->size(); 920 for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); 921 I != E; --Count) { 922 MachineInstr *MI = --I; 923 924 // Update liveness. Registers that are defed but not used in this 925 // instruction are now dead. Mark register and all subregs as they 926 // are completely defined. 927 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 928 MachineOperand &MO = MI->getOperand(i); 929 if (!MO.isReg()) continue; 930 unsigned Reg = MO.getReg(); 931 if (Reg == 0) continue; 932 if (!MO.isDef()) continue; 933 // Ignore two-addr defs. 934 if (MI->isRegTiedToUseOperand(i)) continue; 935 936 KillIndices[Reg] = ~0u; 937 938 // Repeat for all subregs. 939 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 940 *Subreg; ++Subreg) { 941 KillIndices[*Subreg] = ~0u; 942 } 943 } 944 945 // Examine all used registers and set/clear kill flag. When a 946 // register is used multiple times we only set the kill flag on 947 // the first use. 948 killedRegs.clear(); 949 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 950 MachineOperand &MO = MI->getOperand(i); 951 if (!MO.isReg() || !MO.isUse()) continue; 952 unsigned Reg = MO.getReg(); 953 if ((Reg == 0) || ReservedRegs.test(Reg)) continue; 954 955 bool kill = false; 956 if (killedRegs.find(Reg) == killedRegs.end()) { 957 kill = true; 958 // A register is not killed if any subregs are live... 959 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 960 *Subreg; ++Subreg) { 961 if (KillIndices[*Subreg] != ~0u) { 962 kill = false; 963 break; 964 } 965 } 966 967 // If subreg is not live, then register is killed if it became 968 // live in this instruction 969 if (kill) 970 kill = (KillIndices[Reg] == ~0u); 971 } 972 973 if (MO.isKill() != kill) { 974 bool removed = ToggleKillFlag(MI, MO); 975 if (removed) { 976 DEBUG(errs() << "Fixed <removed> in "); 977 } else { 978 DEBUG(errs() << "Fixed " << MO << " in "); 979 } 980 DEBUG(MI->dump()); 981 } 982 983 killedRegs.insert(Reg); 984 } 985 986 // Mark any used register (that is not using undef) and subregs as 987 // now live... 988 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 989 MachineOperand &MO = MI->getOperand(i); 990 if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; 991 unsigned Reg = MO.getReg(); 992 if ((Reg == 0) || ReservedRegs.test(Reg)) continue; 993 994 KillIndices[Reg] = Count; 995 996 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 997 *Subreg; ++Subreg) { 998 KillIndices[*Subreg] = Count; 999 } 1000 } 1001 } 1002} 1003 1004//===----------------------------------------------------------------------===// 1005// Top-Down Scheduling 1006//===----------------------------------------------------------------------===// 1007 1008/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1009/// the PendingQueue if the count reaches zero. Also update its cycle bound. 1010void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 1011 SUnit *SuccSU = SuccEdge->getSUnit(); 1012 1013#ifndef NDEBUG 1014 if (SuccSU->NumPredsLeft == 0) { 1015 errs() << "*** Scheduling failed! ***\n"; 1016 SuccSU->dump(this); 1017 errs() << " has been released too many times!\n"; 1018 llvm_unreachable(0); 1019 } 1020#endif 1021 --SuccSU->NumPredsLeft; 1022 1023 // Compute how many cycles it will be before this actually becomes 1024 // available. This is the max of the start time of all predecessors plus 1025 // their latencies. 1026 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 1027 1028 // If all the node's predecessors are scheduled, this node is ready 1029 // to be scheduled. Ignore the special ExitSU node. 1030 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 1031 PendingQueue.push_back(SuccSU); 1032} 1033 1034/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. 1035void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { 1036 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1037 I != E; ++I) 1038 ReleaseSucc(SU, &*I); 1039} 1040 1041/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1042/// count of its successors. If a successor pending count is zero, add it to 1043/// the Available queue. 1044void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 1045 DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); 1046 DEBUG(SU->dump(this)); 1047 1048 Sequence.push_back(SU); 1049 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1050 SU->setDepthToAtLeast(CurCycle); 1051 1052 ReleaseSuccessors(SU); 1053 SU->isScheduled = true; 1054 AvailableQueue.ScheduledNode(SU); 1055} 1056 1057/// ListScheduleTopDown - The main loop of list scheduling for top-down 1058/// schedulers. 1059void SchedulePostRATDList::ListScheduleTopDown() { 1060 unsigned CurCycle = 0; 1061 1062 // Release any successors of the special Entry node. 1063 ReleaseSuccessors(&EntrySU); 1064 1065 // All leaves to Available queue. 1066 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1067 // It is available if it has no predecessors. 1068 if (SUnits[i].Preds.empty()) { 1069 AvailableQueue.push(&SUnits[i]); 1070 SUnits[i].isAvailable = true; 1071 } 1072 } 1073 1074 // In any cycle where we can't schedule any instructions, we must 1075 // stall or emit a noop, depending on the target. 1076 bool CycleHasInsts = false; 1077 1078 // While Available queue is not empty, grab the node with the highest 1079 // priority. If it is not ready put it back. Schedule the node. 1080 std::vector<SUnit*> NotReady; 1081 Sequence.reserve(SUnits.size()); 1082 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 1083 // Check to see if any of the pending instructions are ready to issue. If 1084 // so, add them to the available queue. 1085 unsigned MinDepth = ~0u; 1086 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 1087 if (PendingQueue[i]->getDepth() <= CurCycle) { 1088 AvailableQueue.push(PendingQueue[i]); 1089 PendingQueue[i]->isAvailable = true; 1090 PendingQueue[i] = PendingQueue.back(); 1091 PendingQueue.pop_back(); 1092 --i; --e; 1093 } else if (PendingQueue[i]->getDepth() < MinDepth) 1094 MinDepth = PendingQueue[i]->getDepth(); 1095 } 1096 1097 DEBUG(errs() << "\n*** Examining Available\n"; 1098 LatencyPriorityQueue q = AvailableQueue; 1099 while (!q.empty()) { 1100 SUnit *su = q.pop(); 1101 errs() << "Height " << su->getHeight() << ": "; 1102 su->dump(this); 1103 }); 1104 1105 SUnit *FoundSUnit = 0; 1106 1107 bool HasNoopHazards = false; 1108 while (!AvailableQueue.empty()) { 1109 SUnit *CurSUnit = AvailableQueue.pop(); 1110 1111 ScheduleHazardRecognizer::HazardType HT = 1112 HazardRec->getHazardType(CurSUnit); 1113 if (HT == ScheduleHazardRecognizer::NoHazard) { 1114 FoundSUnit = CurSUnit; 1115 break; 1116 } 1117 1118 // Remember if this is a noop hazard. 1119 HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; 1120 1121 NotReady.push_back(CurSUnit); 1122 } 1123 1124 // Add the nodes that aren't ready back onto the available list. 1125 if (!NotReady.empty()) { 1126 AvailableQueue.push_all(NotReady); 1127 NotReady.clear(); 1128 } 1129 1130 // If we found a node to schedule, do it now. 1131 if (FoundSUnit) { 1132 ScheduleNodeTopDown(FoundSUnit, CurCycle); 1133 HazardRec->EmitInstruction(FoundSUnit); 1134 CycleHasInsts = true; 1135 1136 // If we are using the target-specific hazards, then don't 1137 // advance the cycle time just because we schedule a node. If 1138 // the target allows it we can schedule multiple nodes in the 1139 // same cycle. 1140 if (!EnablePostRAHazardAvoidance) { 1141 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 1142 ++CurCycle; 1143 } 1144 } else { 1145 if (CycleHasInsts) { 1146 DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); 1147 HazardRec->AdvanceCycle(); 1148 } else if (!HasNoopHazards) { 1149 // Otherwise, we have a pipeline stall, but no other problem, 1150 // just advance the current cycle and try again. 1151 DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); 1152 HazardRec->AdvanceCycle(); 1153 ++NumStalls; 1154 } else { 1155 // Otherwise, we have no instructions to issue and we have instructions 1156 // that will fault if we don't do this right. This is the case for 1157 // processors without pipeline interlocks and other cases. 1158 DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); 1159 HazardRec->EmitNoop(); 1160 Sequence.push_back(0); // NULL here means noop 1161 ++NumNoops; 1162 } 1163 1164 ++CurCycle; 1165 CycleHasInsts = false; 1166 } 1167 } 1168 1169#ifndef NDEBUG 1170 VerifySchedule(/*isBottomUp=*/false); 1171#endif 1172} 1173 1174//===----------------------------------------------------------------------===// 1175// Public Constructor Functions 1176//===----------------------------------------------------------------------===// 1177 1178FunctionPass *llvm::createPostRAScheduler() { 1179 return new PostRAScheduler(); 1180} 1181