ScheduleDAGRRList.cpp revision 83bd3e6df5274f42bbae9f5f611b373ac61c945c
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The LLVM Compiler Infrastructure 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details. 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This implements bottom-up and top-down register pressure reduction list 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// schedulers, using standard algorithms. The basic approach uses a priority 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// queue of available nodes to schedule. One at a time, nodes are taken from 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the priority queue (thus in priority order), checked for legality to 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// schedule, and emitted if legal. 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "pre-RA-sched" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "ScheduleDAGSDNodes.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/InlineAsm.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/SchedulerRegistry.h" 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/SelectionDAGISel.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetRegisterInfo.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetData.h" 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetMachine.h" 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetInstrInfo.h" 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetLowering.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/SmallSet.h" 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/Statistic.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/STLExtras.h" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/CommandLine.h" 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/Debug.h" 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/ErrorHandling.h" 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/raw_ostream.h" 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <climits> 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace llvm; 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched", 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cl::init(false), cl::Hidden); 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumUnfolds, "Number of nodes unfolded"); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumDups, "Number of duplicated nodes"); 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumPRCopies, "Number of physical register copies"); 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) burrListDAGScheduler("list-burr", 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Bottom-up register reduction list scheduling", 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) createBURRListDAGScheduler); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tdrListrDAGScheduler("list-tdrr", 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Top-down register reduction list scheduling", 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) createTDRRListDAGScheduler); 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sourceListDAGScheduler("source", 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Similar to list-burr but schedules in source " 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "order when possible", 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) createSourceListDAGScheduler); 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) hybridListDAGScheduler("list-hybrid", 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Bottom-up rr list scheduling which avoid stalls for " 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "long latency instructions", 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) createHybridListDAGScheduler); 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ScheduleDAGRRList - The actual register reduction list scheduler 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// implementation. This supports both top-down and bottom-up scheduling. 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ScheduleDAGRRList : public ScheduleDAGSDNodes { 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)private: 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// it is top-down. 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool isBottomUp; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// NeedLatency - True if the scheduler will make use of latency information. 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool NeedLatency; 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// AvailableQueue - The priority queue to use for the available SUnits. 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SchedulingPriorityQueue *AvailableQueue; 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// LiveRegDefs - A set of physical registers and their definition 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// that are "live". These nodes must be scheduled before any other nodes that 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// modifies the registers can be scheduled. 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned NumLiveRegs; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<SUnit*> LiveRegDefs; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<unsigned> LiveRegCycles; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// Topo - A topological ordering for SUnits which permits fast IsReachable 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// and similar queries. 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ScheduleDAGTopologicalSort Topo; 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)public: 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ScheduleDAGRRList(MachineFunction &mf, 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool isbottomup, bool needlatency, 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SchedulingPriorityQueue *availqueue) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 100 AvailableQueue(availqueue), Topo(SUnits) { 101 } 102 103 ~ScheduleDAGRRList() { 104 delete AvailableQueue; 105 } 106 107 void Schedule(); 108 109 /// IsReachable - Checks if SU is reachable from TargetSU. 110 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 111 return Topo.IsReachable(SU, TargetSU); 112 } 113 114 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 115 /// create a cycle. 116 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 117 return Topo.WillCreateCycle(SU, TargetSU); 118 } 119 120 /// AddPred - adds a predecessor edge to SUnit SU. 121 /// This returns true if this is a new predecessor. 122 /// Updates the topological ordering if required. 123 void AddPred(SUnit *SU, const SDep &D) { 124 Topo.AddPred(SU, D.getSUnit()); 125 SU->addPred(D); 126 } 127 128 /// RemovePred - removes a predecessor edge from SUnit SU. 129 /// This returns true if an edge was removed. 130 /// Updates the topological ordering if required. 131 void RemovePred(SUnit *SU, const SDep &D) { 132 Topo.RemovePred(SU, D.getSUnit()); 133 SU->removePred(D); 134 } 135 136private: 137 void ReleasePred(SUnit *SU, const SDep *PredEdge); 138 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 139 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 140 void ReleaseSuccessors(SUnit *SU); 141 void CapturePred(SDep *PredEdge); 142 void ScheduleNodeBottomUp(SUnit*, unsigned); 143 void ScheduleNodeTopDown(SUnit*, unsigned); 144 void UnscheduleNodeBottomUp(SUnit*); 145 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 146 SUnit *CopyAndMoveSuccessors(SUnit*); 147 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 148 const TargetRegisterClass*, 149 const TargetRegisterClass*, 150 SmallVector<SUnit*, 2>&); 151 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 152 void ListScheduleTopDown(); 153 void ListScheduleBottomUp(); 154 155 156 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 157 /// Updates the topological ordering if required. 158 SUnit *CreateNewSUnit(SDNode *N) { 159 unsigned NumSUnits = SUnits.size(); 160 SUnit *NewNode = NewSUnit(N); 161 // Update the topological ordering. 162 if (NewNode->NodeNum >= NumSUnits) 163 Topo.InitDAGTopologicalSorting(); 164 return NewNode; 165 } 166 167 /// CreateClone - Creates a new SUnit from an existing one. 168 /// Updates the topological ordering if required. 169 SUnit *CreateClone(SUnit *N) { 170 unsigned NumSUnits = SUnits.size(); 171 SUnit *NewNode = Clone(N); 172 // Update the topological ordering. 173 if (NewNode->NodeNum >= NumSUnits) 174 Topo.InitDAGTopologicalSorting(); 175 return NewNode; 176 } 177 178 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 179 /// need actual latency information but the hybrid scheduler does. 180 bool ForceUnitLatencies() const { 181 return !NeedLatency; 182 } 183}; 184} // end anonymous namespace 185 186 187/// Schedule - Schedule the DAG using list scheduling. 188void ScheduleDAGRRList::Schedule() { 189 DEBUG(dbgs() 190 << "********** List Scheduling BB#" << BB->getNumber() 191 << " **********\n"); 192 193 NumLiveRegs = 0; 194 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 195 LiveRegCycles.resize(TRI->getNumRegs(), 0); 196 197 // Build the scheduling graph. 198 BuildSchedGraph(NULL); 199 200 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 201 SUnits[su].dumpAll(this)); 202 Topo.InitDAGTopologicalSorting(); 203 204 AvailableQueue->initNodes(SUnits); 205 206 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 207 if (isBottomUp) 208 ListScheduleBottomUp(); 209 else 210 ListScheduleTopDown(); 211 212 AvailableQueue->releaseState(); 213} 214 215//===----------------------------------------------------------------------===// 216// Bottom-Up Scheduling 217//===----------------------------------------------------------------------===// 218 219/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 220/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 221void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 222 SUnit *PredSU = PredEdge->getSUnit(); 223 224#ifndef NDEBUG 225 if (PredSU->NumSuccsLeft == 0) { 226 dbgs() << "*** Scheduling failed! ***\n"; 227 PredSU->dump(this); 228 dbgs() << " has been released too many times!\n"; 229 llvm_unreachable(0); 230 } 231#endif 232 --PredSU->NumSuccsLeft; 233 234 if (!ForceUnitLatencies()) { 235 // Updating predecessor's height. This is now the cycle when the 236 // predecessor can be scheduled without causing a pipeline stall. 237 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 238 } 239 240 // If all the node's successors are scheduled, this node is ready 241 // to be scheduled. Ignore the special EntrySU node. 242 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 243 PredSU->isAvailable = true; 244 AvailableQueue->push(PredSU); 245 } 246} 247 248void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 249 // Bottom up: release predecessors 250 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 251 I != E; ++I) { 252 ReleasePred(SU, &*I); 253 if (I->isAssignedRegDep()) { 254 // This is a physical register dependency and it's impossible or 255 // expensive to copy the register. Make sure nothing that can 256 // clobber the register is scheduled between the predecessor and 257 // this node. 258 if (!LiveRegDefs[I->getReg()]) { 259 ++NumLiveRegs; 260 LiveRegDefs[I->getReg()] = I->getSUnit(); 261 LiveRegCycles[I->getReg()] = CurCycle; 262 } 263 } 264 } 265} 266 267/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 268/// count of its predecessors. If a predecessor pending count is zero, add it to 269/// the Available queue. 270void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 271 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 272 DEBUG(SU->dump(this)); 273 274#ifndef NDEBUG 275 if (CurCycle < SU->getHeight()) 276 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 277#endif 278 279 // FIXME: Handle noop hazard. 280 SU->setHeightToAtLeast(CurCycle); 281 Sequence.push_back(SU); 282 283 ReleasePredecessors(SU, CurCycle); 284 285 // Release all the implicit physical register defs that are live. 286 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 287 I != E; ++I) { 288 if (I->isAssignedRegDep()) { 289 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 290 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 291 assert(LiveRegDefs[I->getReg()] == SU && 292 "Physical register dependency violated?"); 293 --NumLiveRegs; 294 LiveRegDefs[I->getReg()] = NULL; 295 LiveRegCycles[I->getReg()] = 0; 296 } 297 } 298 } 299 300 SU->isScheduled = true; 301 AvailableQueue->ScheduledNode(SU); 302} 303 304/// CapturePred - This does the opposite of ReleasePred. Since SU is being 305/// unscheduled, incrcease the succ left count of its predecessors. Remove 306/// them from AvailableQueue if necessary. 307void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 308 SUnit *PredSU = PredEdge->getSUnit(); 309 if (PredSU->isAvailable) { 310 PredSU->isAvailable = false; 311 if (!PredSU->isPending) 312 AvailableQueue->remove(PredSU); 313 } 314 315 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 316 ++PredSU->NumSuccsLeft; 317} 318 319/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 320/// its predecessor states to reflect the change. 321void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 322 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 323 DEBUG(SU->dump(this)); 324 325 AvailableQueue->UnscheduledNode(SU); 326 327 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 328 I != E; ++I) { 329 CapturePred(&*I); 330 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ 331 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 332 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 333 "Physical register dependency violated?"); 334 --NumLiveRegs; 335 LiveRegDefs[I->getReg()] = NULL; 336 LiveRegCycles[I->getReg()] = 0; 337 } 338 } 339 340 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 341 I != E; ++I) { 342 if (I->isAssignedRegDep()) { 343 if (!LiveRegDefs[I->getReg()]) { 344 LiveRegDefs[I->getReg()] = SU; 345 ++NumLiveRegs; 346 } 347 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 348 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 349 } 350 } 351 352 SU->setHeightDirty(); 353 SU->isScheduled = false; 354 SU->isAvailable = true; 355 AvailableQueue->push(SU); 356} 357 358/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 359/// BTCycle in order to schedule a specific node. 360void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 361 unsigned &CurCycle) { 362 SUnit *OldSU = NULL; 363 while (CurCycle > BtCycle) { 364 OldSU = Sequence.back(); 365 Sequence.pop_back(); 366 if (SU->isSucc(OldSU)) 367 // Don't try to remove SU from AvailableQueue. 368 SU->isAvailable = false; 369 UnscheduleNodeBottomUp(OldSU); 370 --CurCycle; 371 AvailableQueue->setCurCycle(CurCycle); 372 } 373 374 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 375 376 ++NumBacktracks; 377} 378 379static bool isOperandOf(const SUnit *SU, SDNode *N) { 380 for (const SDNode *SUNode = SU->getNode(); SUNode; 381 SUNode = SUNode->getFlaggedNode()) { 382 if (SUNode->isOperandOf(N)) 383 return true; 384 } 385 return false; 386} 387 388/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 389/// successors to the newly created node. 390SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 391 if (SU->getNode()->getFlaggedNode()) 392 return NULL; 393 394 SDNode *N = SU->getNode(); 395 if (!N) 396 return NULL; 397 398 SUnit *NewSU; 399 bool TryUnfold = false; 400 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 401 EVT VT = N->getValueType(i); 402 if (VT == MVT::Flag) 403 return NULL; 404 else if (VT == MVT::Other) 405 TryUnfold = true; 406 } 407 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 408 const SDValue &Op = N->getOperand(i); 409 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 410 if (VT == MVT::Flag) 411 return NULL; 412 } 413 414 if (TryUnfold) { 415 SmallVector<SDNode*, 2> NewNodes; 416 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 417 return NULL; 418 419 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 420 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 421 422 N = NewNodes[1]; 423 SDNode *LoadNode = NewNodes[0]; 424 unsigned NumVals = N->getNumValues(); 425 unsigned OldNumVals = SU->getNode()->getNumValues(); 426 for (unsigned i = 0; i != NumVals; ++i) 427 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 428 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 429 SDValue(LoadNode, 1)); 430 431 // LoadNode may already exist. This can happen when there is another 432 // load from the same location and producing the same type of value 433 // but it has different alignment or volatileness. 434 bool isNewLoad = true; 435 SUnit *LoadSU; 436 if (LoadNode->getNodeId() != -1) { 437 LoadSU = &SUnits[LoadNode->getNodeId()]; 438 isNewLoad = false; 439 } else { 440 LoadSU = CreateNewSUnit(LoadNode); 441 LoadNode->setNodeId(LoadSU->NodeNum); 442 ComputeLatency(LoadSU); 443 } 444 445 SUnit *NewSU = CreateNewSUnit(N); 446 assert(N->getNodeId() == -1 && "Node already inserted!"); 447 N->setNodeId(NewSU->NodeNum); 448 449 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 450 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 451 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 452 NewSU->isTwoAddress = true; 453 break; 454 } 455 } 456 if (TID.isCommutable()) 457 NewSU->isCommutable = true; 458 ComputeLatency(NewSU); 459 460 // Record all the edges to and from the old SU, by category. 461 SmallVector<SDep, 4> ChainPreds; 462 SmallVector<SDep, 4> ChainSuccs; 463 SmallVector<SDep, 4> LoadPreds; 464 SmallVector<SDep, 4> NodePreds; 465 SmallVector<SDep, 4> NodeSuccs; 466 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 467 I != E; ++I) { 468 if (I->isCtrl()) 469 ChainPreds.push_back(*I); 470 else if (isOperandOf(I->getSUnit(), LoadNode)) 471 LoadPreds.push_back(*I); 472 else 473 NodePreds.push_back(*I); 474 } 475 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 476 I != E; ++I) { 477 if (I->isCtrl()) 478 ChainSuccs.push_back(*I); 479 else 480 NodeSuccs.push_back(*I); 481 } 482 483 // Now assign edges to the newly-created nodes. 484 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 485 const SDep &Pred = ChainPreds[i]; 486 RemovePred(SU, Pred); 487 if (isNewLoad) 488 AddPred(LoadSU, Pred); 489 } 490 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 491 const SDep &Pred = LoadPreds[i]; 492 RemovePred(SU, Pred); 493 if (isNewLoad) 494 AddPred(LoadSU, Pred); 495 } 496 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 497 const SDep &Pred = NodePreds[i]; 498 RemovePred(SU, Pred); 499 AddPred(NewSU, Pred); 500 } 501 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 502 SDep D = NodeSuccs[i]; 503 SUnit *SuccDep = D.getSUnit(); 504 D.setSUnit(SU); 505 RemovePred(SuccDep, D); 506 D.setSUnit(NewSU); 507 AddPred(SuccDep, D); 508 } 509 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 510 SDep D = ChainSuccs[i]; 511 SUnit *SuccDep = D.getSUnit(); 512 D.setSUnit(SU); 513 RemovePred(SuccDep, D); 514 if (isNewLoad) { 515 D.setSUnit(LoadSU); 516 AddPred(SuccDep, D); 517 } 518 } 519 520 // Add a data dependency to reflect that NewSU reads the value defined 521 // by LoadSU. 522 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 523 524 if (isNewLoad) 525 AvailableQueue->addNode(LoadSU); 526 AvailableQueue->addNode(NewSU); 527 528 ++NumUnfolds; 529 530 if (NewSU->NumSuccsLeft == 0) { 531 NewSU->isAvailable = true; 532 return NewSU; 533 } 534 SU = NewSU; 535 } 536 537 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 538 NewSU = CreateClone(SU); 539 540 // New SUnit has the exact same predecessors. 541 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 542 I != E; ++I) 543 if (!I->isArtificial()) 544 AddPred(NewSU, *I); 545 546 // Only copy scheduled successors. Cut them from old node's successor 547 // list and move them over. 548 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 550 I != E; ++I) { 551 if (I->isArtificial()) 552 continue; 553 SUnit *SuccSU = I->getSUnit(); 554 if (SuccSU->isScheduled) { 555 SDep D = *I; 556 D.setSUnit(NewSU); 557 AddPred(SuccSU, D); 558 D.setSUnit(SU); 559 DelDeps.push_back(std::make_pair(SuccSU, D)); 560 } 561 } 562 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 563 RemovePred(DelDeps[i].first, DelDeps[i].second); 564 565 AvailableQueue->updateNode(SU); 566 AvailableQueue->addNode(NewSU); 567 568 ++NumDups; 569 return NewSU; 570} 571 572/// InsertCopiesAndMoveSuccs - Insert register copies and move all 573/// scheduled successors of the given SUnit to the last copy. 574void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 575 const TargetRegisterClass *DestRC, 576 const TargetRegisterClass *SrcRC, 577 SmallVector<SUnit*, 2> &Copies) { 578 SUnit *CopyFromSU = CreateNewSUnit(NULL); 579 CopyFromSU->CopySrcRC = SrcRC; 580 CopyFromSU->CopyDstRC = DestRC; 581 582 SUnit *CopyToSU = CreateNewSUnit(NULL); 583 CopyToSU->CopySrcRC = DestRC; 584 CopyToSU->CopyDstRC = SrcRC; 585 586 // Only copy scheduled successors. Cut them from old node's successor 587 // list and move them over. 588 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 589 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 590 I != E; ++I) { 591 if (I->isArtificial()) 592 continue; 593 SUnit *SuccSU = I->getSUnit(); 594 if (SuccSU->isScheduled) { 595 SDep D = *I; 596 D.setSUnit(CopyToSU); 597 AddPred(SuccSU, D); 598 DelDeps.push_back(std::make_pair(SuccSU, *I)); 599 } 600 } 601 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 602 RemovePred(DelDeps[i].first, DelDeps[i].second); 603 604 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 605 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 606 607 AvailableQueue->updateNode(SU); 608 AvailableQueue->addNode(CopyFromSU); 609 AvailableQueue->addNode(CopyToSU); 610 Copies.push_back(CopyFromSU); 611 Copies.push_back(CopyToSU); 612 613 ++NumPRCopies; 614} 615 616/// getPhysicalRegisterVT - Returns the ValueType of the physical register 617/// definition of the specified node. 618/// FIXME: Move to SelectionDAG? 619static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 620 const TargetInstrInfo *TII) { 621 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 622 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 623 unsigned NumRes = TID.getNumDefs(); 624 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 625 if (Reg == *ImpDef) 626 break; 627 ++NumRes; 628 } 629 return N->getValueType(NumRes); 630} 631 632/// CheckForLiveRegDef - Return true and update live register vector if the 633/// specified register def of the specified SUnit clobbers any "live" registers. 634static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 635 std::vector<SUnit*> &LiveRegDefs, 636 SmallSet<unsigned, 4> &RegAdded, 637 SmallVector<unsigned, 4> &LRegs, 638 const TargetRegisterInfo *TRI) { 639 bool Added = false; 640 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 641 if (RegAdded.insert(Reg)) { 642 LRegs.push_back(Reg); 643 Added = true; 644 } 645 } 646 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 647 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 648 if (RegAdded.insert(*Alias)) { 649 LRegs.push_back(*Alias); 650 Added = true; 651 } 652 } 653 return Added; 654} 655 656/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 657/// scheduling of the given node to satisfy live physical register dependencies. 658/// If the specific node is the last one that's available to schedule, do 659/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 660bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 661 SmallVector<unsigned, 4> &LRegs){ 662 if (NumLiveRegs == 0) 663 return false; 664 665 SmallSet<unsigned, 4> RegAdded; 666 // If this node would clobber any "live" register, then it's not ready. 667 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 668 I != E; ++I) { 669 if (I->isAssignedRegDep()) 670 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 671 RegAdded, LRegs, TRI); 672 } 673 674 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 675 if (Node->getOpcode() == ISD::INLINEASM) { 676 // Inline asm can clobber physical defs. 677 unsigned NumOps = Node->getNumOperands(); 678 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 679 --NumOps; // Ignore the flag operand. 680 681 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 682 unsigned Flags = 683 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 684 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 685 686 ++i; // Skip the ID value. 687 if (InlineAsm::isRegDefKind(Flags) || 688 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 689 // Check for def of register or earlyclobber register. 690 for (; NumVals; --NumVals, ++i) { 691 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 692 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 693 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 694 } 695 } else 696 i += NumVals; 697 } 698 continue; 699 } 700 701 if (!Node->isMachineOpcode()) 702 continue; 703 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 704 if (!TID.ImplicitDefs) 705 continue; 706 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 707 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 708 } 709 return !LRegs.empty(); 710} 711 712 713/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 714/// schedulers. 715void ScheduleDAGRRList::ListScheduleBottomUp() { 716 unsigned CurCycle = 0; 717 718 // Release any predecessors of the special Exit node. 719 ReleasePredecessors(&ExitSU, CurCycle); 720 721 // Add root to Available queue. 722 if (!SUnits.empty()) { 723 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 724 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 725 RootSU->isAvailable = true; 726 AvailableQueue->push(RootSU); 727 } 728 729 // While Available queue is not empty, grab the node with the highest 730 // priority. If it is not ready put it back. Schedule the node. 731 SmallVector<SUnit*, 4> NotReady; 732 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 733 Sequence.reserve(SUnits.size()); 734 while (!AvailableQueue->empty()) { 735 bool Delayed = false; 736 LRegsMap.clear(); 737 SUnit *CurSU = AvailableQueue->pop(); 738 while (CurSU) { 739 SmallVector<unsigned, 4> LRegs; 740 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 741 break; 742 Delayed = true; 743 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 744 745 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 746 NotReady.push_back(CurSU); 747 CurSU = AvailableQueue->pop(); 748 } 749 750 // All candidates are delayed due to live physical reg dependencies. 751 // Try backtracking, code duplication, or inserting cross class copies 752 // to resolve it. 753 if (Delayed && !CurSU) { 754 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 755 SUnit *TrySU = NotReady[i]; 756 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 757 758 // Try unscheduling up to the point where it's safe to schedule 759 // this node. 760 unsigned LiveCycle = CurCycle; 761 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 762 unsigned Reg = LRegs[j]; 763 unsigned LCycle = LiveRegCycles[Reg]; 764 LiveCycle = std::min(LiveCycle, LCycle); 765 } 766 SUnit *OldSU = Sequence[LiveCycle]; 767 if (!WillCreateCycle(TrySU, OldSU)) { 768 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 769 // Force the current node to be scheduled before the node that 770 // requires the physical reg dep. 771 if (OldSU->isAvailable) { 772 OldSU->isAvailable = false; 773 AvailableQueue->remove(OldSU); 774 } 775 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 776 /*Reg=*/0, /*isNormalMemory=*/false, 777 /*isMustAlias=*/false, /*isArtificial=*/true)); 778 // If one or more successors has been unscheduled, then the current 779 // node is no longer avaialable. Schedule a successor that's now 780 // available instead. 781 if (!TrySU->isAvailable) 782 CurSU = AvailableQueue->pop(); 783 else { 784 CurSU = TrySU; 785 TrySU->isPending = false; 786 NotReady.erase(NotReady.begin()+i); 787 } 788 break; 789 } 790 } 791 792 if (!CurSU) { 793 // Can't backtrack. If it's too expensive to copy the value, then try 794 // duplicate the nodes that produces these "too expensive to copy" 795 // values to break the dependency. In case even that doesn't work, 796 // insert cross class copies. 797 // If it's not too expensive, i.e. cost != -1, issue copies. 798 SUnit *TrySU = NotReady[0]; 799 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 800 assert(LRegs.size() == 1 && "Can't handle this yet!"); 801 unsigned Reg = LRegs[0]; 802 SUnit *LRDef = LiveRegDefs[Reg]; 803 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 804 const TargetRegisterClass *RC = 805 TRI->getMinimalPhysRegClass(Reg, VT); 806 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 807 808 // If cross copy register class is null, then it must be possible copy 809 // the value directly. Do not try duplicate the def. 810 SUnit *NewDef = 0; 811 if (DestRC) 812 NewDef = CopyAndMoveSuccessors(LRDef); 813 else 814 DestRC = RC; 815 if (!NewDef) { 816 // Issue copies, these can be expensive cross register class copies. 817 SmallVector<SUnit*, 2> Copies; 818 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 819 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 820 << " to SU #" << Copies.front()->NodeNum << "\n"); 821 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 822 /*Reg=*/0, /*isNormalMemory=*/false, 823 /*isMustAlias=*/false, 824 /*isArtificial=*/true)); 825 NewDef = Copies.back(); 826 } 827 828 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 829 << " to SU #" << TrySU->NodeNum << "\n"); 830 LiveRegDefs[Reg] = NewDef; 831 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 832 /*Reg=*/0, /*isNormalMemory=*/false, 833 /*isMustAlias=*/false, 834 /*isArtificial=*/true)); 835 TrySU->isAvailable = false; 836 CurSU = NewDef; 837 } 838 839 assert(CurSU && "Unable to resolve live physical register dependencies!"); 840 } 841 842 // Add the nodes that aren't ready back onto the available list. 843 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 844 NotReady[i]->isPending = false; 845 // May no longer be available due to backtracking. 846 if (NotReady[i]->isAvailable) 847 AvailableQueue->push(NotReady[i]); 848 } 849 NotReady.clear(); 850 851 if (CurSU) 852 ScheduleNodeBottomUp(CurSU, CurCycle); 853 ++CurCycle; 854 AvailableQueue->setCurCycle(CurCycle); 855 } 856 857 // Reverse the order if it is bottom up. 858 std::reverse(Sequence.begin(), Sequence.end()); 859 860#ifndef NDEBUG 861 VerifySchedule(isBottomUp); 862#endif 863} 864 865//===----------------------------------------------------------------------===// 866// Top-Down Scheduling 867//===----------------------------------------------------------------------===// 868 869/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 870/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 871void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 872 SUnit *SuccSU = SuccEdge->getSUnit(); 873 874#ifndef NDEBUG 875 if (SuccSU->NumPredsLeft == 0) { 876 dbgs() << "*** Scheduling failed! ***\n"; 877 SuccSU->dump(this); 878 dbgs() << " has been released too many times!\n"; 879 llvm_unreachable(0); 880 } 881#endif 882 --SuccSU->NumPredsLeft; 883 884 // If all the node's predecessors are scheduled, this node is ready 885 // to be scheduled. Ignore the special ExitSU node. 886 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 887 SuccSU->isAvailable = true; 888 AvailableQueue->push(SuccSU); 889 } 890} 891 892void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 893 // Top down: release successors 894 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 895 I != E; ++I) { 896 assert(!I->isAssignedRegDep() && 897 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 898 899 ReleaseSucc(SU, &*I); 900 } 901} 902 903/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 904/// count of its successors. If a successor pending count is zero, add it to 905/// the Available queue. 906void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 907 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 908 DEBUG(SU->dump(this)); 909 910 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 911 SU->setDepthToAtLeast(CurCycle); 912 Sequence.push_back(SU); 913 914 ReleaseSuccessors(SU); 915 SU->isScheduled = true; 916 AvailableQueue->ScheduledNode(SU); 917} 918 919/// ListScheduleTopDown - The main loop of list scheduling for top-down 920/// schedulers. 921void ScheduleDAGRRList::ListScheduleTopDown() { 922 unsigned CurCycle = 0; 923 AvailableQueue->setCurCycle(CurCycle); 924 925 // Release any successors of the special Entry node. 926 ReleaseSuccessors(&EntrySU); 927 928 // All leaves to Available queue. 929 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 930 // It is available if it has no predecessors. 931 if (SUnits[i].Preds.empty()) { 932 AvailableQueue->push(&SUnits[i]); 933 SUnits[i].isAvailable = true; 934 } 935 } 936 937 // While Available queue is not empty, grab the node with the highest 938 // priority. If it is not ready put it back. Schedule the node. 939 Sequence.reserve(SUnits.size()); 940 while (!AvailableQueue->empty()) { 941 SUnit *CurSU = AvailableQueue->pop(); 942 943 if (CurSU) 944 ScheduleNodeTopDown(CurSU, CurCycle); 945 ++CurCycle; 946 AvailableQueue->setCurCycle(CurCycle); 947 } 948 949#ifndef NDEBUG 950 VerifySchedule(isBottomUp); 951#endif 952} 953 954 955//===----------------------------------------------------------------------===// 956// RegReductionPriorityQueue Implementation 957//===----------------------------------------------------------------------===// 958// 959// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 960// to reduce register pressure. 961// 962namespace { 963 template<class SF> 964 class RegReductionPriorityQueue; 965 966 /// Sorting functions for the Available queue. 967 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 968 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 969 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 970 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 971 972 bool operator()(const SUnit* left, const SUnit* right) const; 973 }; 974 975 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 976 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 977 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 978 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 979 980 bool operator()(const SUnit* left, const SUnit* right) const; 981 }; 982 983 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 984 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 985 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 986 : SPQ(spq) {} 987 src_ls_rr_sort(const src_ls_rr_sort &RHS) 988 : SPQ(RHS.SPQ) {} 989 990 bool operator()(const SUnit* left, const SUnit* right) const; 991 }; 992 993 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 994 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 995 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 996 : SPQ(spq) {} 997 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 998 : SPQ(RHS.SPQ) {} 999 1000 bool operator()(const SUnit* left, const SUnit* right) const; 1001 }; 1002} // end anonymous namespace 1003 1004/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1005/// Smaller number is the higher priority. 1006static unsigned 1007CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1008 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1009 if (SethiUllmanNumber != 0) 1010 return SethiUllmanNumber; 1011 1012 unsigned Extra = 0; 1013 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1014 I != E; ++I) { 1015 if (I->isCtrl()) continue; // ignore chain preds 1016 SUnit *PredSU = I->getSUnit(); 1017 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1018 if (PredSethiUllman > SethiUllmanNumber) { 1019 SethiUllmanNumber = PredSethiUllman; 1020 Extra = 0; 1021 } else if (PredSethiUllman == SethiUllmanNumber) 1022 ++Extra; 1023 } 1024 1025 SethiUllmanNumber += Extra; 1026 1027 if (SethiUllmanNumber == 0) 1028 SethiUllmanNumber = 1; 1029 1030 return SethiUllmanNumber; 1031} 1032 1033namespace { 1034 template<class SF> 1035 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1036 std::vector<SUnit*> Queue; 1037 SF Picker; 1038 unsigned CurQueueId; 1039 bool isBottomUp; 1040 1041 protected: 1042 // SUnits - The SUnits for the current graph. 1043 std::vector<SUnit> *SUnits; 1044 1045 MachineFunction &MF; 1046 const TargetInstrInfo *TII; 1047 const TargetRegisterInfo *TRI; 1048 const TargetLowering *TLI; 1049 ScheduleDAGRRList *scheduleDAG; 1050 1051 // SethiUllmanNumbers - The SethiUllman number for each node. 1052 std::vector<unsigned> SethiUllmanNumbers; 1053 1054 /// RegPressure - Tracking current reg pressure per register class. 1055 /// 1056 std::vector<int> RegPressure; 1057 1058 /// RegLimit - Tracking the number of allocatable registers per register 1059 /// class. 1060 std::vector<int> RegLimit; 1061 1062 public: 1063 RegReductionPriorityQueue(MachineFunction &mf, 1064 bool isbottomup, 1065 const TargetInstrInfo *tii, 1066 const TargetRegisterInfo *tri, 1067 const TargetLowering *tli) 1068 : Picker(this), CurQueueId(0), isBottomUp(isbottomup), 1069 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1070 unsigned NumRC = TRI->getNumRegClasses(); 1071 RegLimit.resize(NumRC); 1072 RegPressure.resize(NumRC); 1073 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1074 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1075 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1076 E = TRI->regclass_end(); I != E; ++I) 1077 RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1; 1078 } 1079 1080 void initNodes(std::vector<SUnit> &sunits) { 1081 SUnits = &sunits; 1082 // Add pseudo dependency edges for two-address nodes. 1083 AddPseudoTwoAddrDeps(); 1084 // Reroute edges to nodes with multiple uses. 1085 PrescheduleNodesWithMultipleUses(); 1086 // Calculate node priorities. 1087 CalculateSethiUllmanNumbers(); 1088 } 1089 1090 void addNode(const SUnit *SU) { 1091 unsigned SUSize = SethiUllmanNumbers.size(); 1092 if (SUnits->size() > SUSize) 1093 SethiUllmanNumbers.resize(SUSize*2, 0); 1094 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1095 } 1096 1097 void updateNode(const SUnit *SU) { 1098 SethiUllmanNumbers[SU->NodeNum] = 0; 1099 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1100 } 1101 1102 void releaseState() { 1103 SUnits = 0; 1104 SethiUllmanNumbers.clear(); 1105 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1106 } 1107 1108 unsigned getNodePriority(const SUnit *SU) const { 1109 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1110 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1111 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1112 // CopyToReg should be close to its uses to facilitate coalescing and 1113 // avoid spilling. 1114 return 0; 1115 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1116 Opc == TargetOpcode::SUBREG_TO_REG || 1117 Opc == TargetOpcode::INSERT_SUBREG) 1118 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1119 // close to their uses to facilitate coalescing. 1120 return 0; 1121 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1122 // If SU does not have a register use, i.e. it doesn't produce a value 1123 // that would be consumed (e.g. store), then it terminates a chain of 1124 // computation. Give it a large SethiUllman number so it will be 1125 // scheduled right before its predecessors that it doesn't lengthen 1126 // their live ranges. 1127 return 0xffff; 1128 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1129 // If SU does not have a register def, schedule it close to its uses 1130 // because it does not lengthen any live ranges. 1131 return 0; 1132 return SethiUllmanNumbers[SU->NodeNum]; 1133 } 1134 1135 unsigned getNodeOrdering(const SUnit *SU) const { 1136 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1137 } 1138 1139 bool empty() const { return Queue.empty(); } 1140 1141 void push(SUnit *U) { 1142 assert(!U->NodeQueueId && "Node in the queue already"); 1143 U->NodeQueueId = ++CurQueueId; 1144 Queue.push_back(U); 1145 } 1146 1147 SUnit *pop() { 1148 if (empty()) return NULL; 1149 std::vector<SUnit *>::iterator Best = Queue.begin(); 1150 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), 1151 E = Queue.end(); I != E; ++I) 1152 if (Picker(*Best, *I)) 1153 Best = I; 1154 SUnit *V = *Best; 1155 if (Best != prior(Queue.end())) 1156 std::swap(*Best, Queue.back()); 1157 Queue.pop_back(); 1158 V->NodeQueueId = 0; 1159 return V; 1160 } 1161 1162 void remove(SUnit *SU) { 1163 assert(!Queue.empty() && "Queue is empty!"); 1164 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1165 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1166 SU); 1167 if (I != prior(Queue.end())) 1168 std::swap(*I, Queue.back()); 1169 Queue.pop_back(); 1170 SU->NodeQueueId = 0; 1171 } 1172 1173 // EstimateSpills - Given a scheduling unit, estimate the number of spills 1174 // it would cause by scheduling it at the current cycle. 1175 unsigned EstimateSpills(const SUnit *SU) const { 1176 if (!TLI) 1177 return 0; 1178 1179 unsigned Spills = 0; 1180 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1181 I != E; ++I) { 1182 if (I->isCtrl()) 1183 continue; 1184 SUnit *PredSU = I->getSUnit(); 1185 if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) 1186 continue; 1187 const SDNode *N = PredSU->getNode(); 1188 if (!N->isMachineOpcode()) 1189 continue; 1190 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1191 for (unsigned i = 0; i != NumDefs; ++i) { 1192 EVT VT = N->getValueType(i); 1193 if (!N->hasAnyUseOfValue(i)) 1194 continue; 1195 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1196 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1197 // Check if this increases register pressure of the specific register 1198 // class to the point where it would cause spills. 1199 int Excess = RegPressure[RCId] + Cost - RegLimit[RCId]; 1200 if (Excess > 0) 1201 Spills += Excess; 1202 } 1203 } 1204 1205 if (!SU->NumSuccs || !Spills) 1206 return Spills; 1207 const SDNode *N = SU->getNode(); 1208 if (!N->isMachineOpcode()) 1209 return Spills; 1210 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1211 for (unsigned i = 0; i != NumDefs; ++i) { 1212 EVT VT = N->getValueType(i); 1213 if (!N->hasAnyUseOfValue(i)) 1214 continue; 1215 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1216 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1217 if (RegPressure[RCId] > RegLimit[RCId]) { 1218 int Less = RegLimit[RCId] - (RegPressure[RCId] - Cost); 1219 if (Less > 0) { 1220 if (Spills <= (unsigned)Less) 1221 return 0; 1222 Spills -= Less; 1223 } 1224 } 1225 } 1226 1227 return Spills; 1228 } 1229 1230 void OpenPredLives(SUnit *SU) { 1231 const SDNode *N = SU->getNode(); 1232 if (!N->isMachineOpcode()) 1233 return; 1234 unsigned Opc = N->getMachineOpcode(); 1235 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1236 Opc == TargetOpcode::INSERT_SUBREG || 1237 Opc == TargetOpcode::SUBREG_TO_REG || 1238 Opc == TargetOpcode::COPY_TO_REGCLASS || 1239 Opc == TargetOpcode::REG_SEQUENCE || 1240 Opc == TargetOpcode::IMPLICIT_DEF) 1241 return; 1242 1243 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1244 I != E; ++I) { 1245 if (I->isCtrl()) 1246 continue; 1247 SUnit *PredSU = I->getSUnit(); 1248 if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) 1249 continue; 1250 const SDNode *PN = PredSU->getNode(); 1251 if (!PN->isMachineOpcode()) 1252 continue; 1253 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1254 for (unsigned i = 0; i != NumDefs; ++i) { 1255 EVT VT = PN->getValueType(i); 1256 if (!PN->hasAnyUseOfValue(i)) 1257 continue; 1258 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1259 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1260 } 1261 } 1262 1263 if (!SU->NumSuccs) 1264 return; 1265 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1266 for (unsigned i = 0; i != NumDefs; ++i) { 1267 EVT VT = N->getValueType(i); 1268 if (!N->hasAnyUseOfValue(i)) 1269 continue; 1270 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1271 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1272 if (RegPressure[RCId] < 0) 1273 // Register pressure tracking is imprecise. This can happen. 1274 RegPressure[RCId] = 0; 1275 } 1276 } 1277 1278 void ClosePredLives(SUnit *SU) { 1279 const SDNode *N = SU->getNode(); 1280 if (!N->isMachineOpcode()) 1281 return; 1282 unsigned Opc = N->getMachineOpcode(); 1283 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1284 Opc == TargetOpcode::INSERT_SUBREG || 1285 Opc == TargetOpcode::SUBREG_TO_REG || 1286 Opc == TargetOpcode::COPY_TO_REGCLASS || 1287 Opc == TargetOpcode::REG_SEQUENCE || 1288 Opc == TargetOpcode::IMPLICIT_DEF) 1289 return; 1290 1291 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1292 I != E; ++I) { 1293 if (I->isCtrl()) 1294 continue; 1295 SUnit *PredSU = I->getSUnit(); 1296 if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) 1297 continue; 1298 const SDNode *PN = PredSU->getNode(); 1299 if (!PN->isMachineOpcode()) 1300 continue; 1301 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1302 for (unsigned i = 0; i != NumDefs; ++i) { 1303 EVT VT = PN->getValueType(i); 1304 if (!PN->hasAnyUseOfValue(i)) 1305 continue; 1306 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1307 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1308 if (RegPressure[RCId] < 0) 1309 // Register pressure tracking is imprecise. This can happen. 1310 RegPressure[RCId] = 0; 1311 } 1312 } 1313 1314 if (!SU->NumSuccs) 1315 return; 1316 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1317 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1318 EVT VT = N->getValueType(i); 1319 if (VT == MVT::Flag || VT == MVT::Other) 1320 continue; 1321 if (!N->hasAnyUseOfValue(i)) 1322 continue; 1323 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1324 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1325 } 1326 } 1327 1328 void ScheduledNode(SUnit *SU) { 1329 if (!TLI || !isBottomUp) 1330 return; 1331 OpenPredLives(SU); 1332 dumpRegPressure(); 1333 } 1334 1335 void UnscheduledNode(SUnit *SU) { 1336 if (!TLI || !isBottomUp) 1337 return; 1338 ClosePredLives(SU); 1339 dumpRegPressure(); 1340 } 1341 1342 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1343 scheduleDAG = scheduleDag; 1344 } 1345 1346 void dumpRegPressure() const { 1347 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1348 E = TRI->regclass_end(); I != E; ++I) { 1349 const TargetRegisterClass *RC = *I; 1350 unsigned Id = RC->getID(); 1351 unsigned RP = RegPressure[Id]; 1352 if (!RP) continue; 1353 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1354 << '\n'); 1355 } 1356 } 1357 1358 protected: 1359 bool canClobber(const SUnit *SU, const SUnit *Op); 1360 void AddPseudoTwoAddrDeps(); 1361 void PrescheduleNodesWithMultipleUses(); 1362 void CalculateSethiUllmanNumbers(); 1363 }; 1364 1365 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1366 BURegReductionPriorityQueue; 1367 1368 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1369 TDRegReductionPriorityQueue; 1370 1371 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1372 SrcRegReductionPriorityQueue; 1373 1374 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1375 HybridBURRPriorityQueue; 1376} 1377 1378/// closestSucc - Returns the scheduled cycle of the successor which is 1379/// closest to the current cycle. 1380static unsigned closestSucc(const SUnit *SU) { 1381 unsigned MaxHeight = 0; 1382 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1383 I != E; ++I) { 1384 if (I->isCtrl()) continue; // ignore chain succs 1385 unsigned Height = I->getSUnit()->getHeight(); 1386 // If there are bunch of CopyToRegs stacked up, they should be considered 1387 // to be at the same position. 1388 if (I->getSUnit()->getNode() && 1389 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1390 Height = closestSucc(I->getSUnit())+1; 1391 if (Height > MaxHeight) 1392 MaxHeight = Height; 1393 } 1394 return MaxHeight; 1395} 1396 1397/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1398/// for scratch registers, i.e. number of data dependencies. 1399static unsigned calcMaxScratches(const SUnit *SU) { 1400 unsigned Scratches = 0; 1401 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1402 I != E; ++I) { 1403 if (I->isCtrl()) continue; // ignore chain preds 1404 Scratches++; 1405 } 1406 return Scratches; 1407} 1408 1409template <typename RRSort> 1410static bool BURRSort(const SUnit *left, const SUnit *right, 1411 const RegReductionPriorityQueue<RRSort> *SPQ) { 1412 unsigned LPriority = SPQ->getNodePriority(left); 1413 unsigned RPriority = SPQ->getNodePriority(right); 1414 if (LPriority != RPriority) 1415 return LPriority > RPriority; 1416 1417 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1418 // e.g. 1419 // t1 = op t2, c1 1420 // t3 = op t4, c2 1421 // 1422 // and the following instructions are both ready. 1423 // t2 = op c3 1424 // t4 = op c4 1425 // 1426 // Then schedule t2 = op first. 1427 // i.e. 1428 // t4 = op c4 1429 // t2 = op c3 1430 // t1 = op t2, c1 1431 // t3 = op t4, c2 1432 // 1433 // This creates more short live intervals. 1434 unsigned LDist = closestSucc(left); 1435 unsigned RDist = closestSucc(right); 1436 if (LDist != RDist) 1437 return LDist < RDist; 1438 1439 // How many registers becomes live when the node is scheduled. 1440 unsigned LScratch = calcMaxScratches(left); 1441 unsigned RScratch = calcMaxScratches(right); 1442 if (LScratch != RScratch) 1443 return LScratch > RScratch; 1444 1445 if (left->getHeight() != right->getHeight()) 1446 return left->getHeight() > right->getHeight(); 1447 1448 if (left->getDepth() != right->getDepth()) 1449 return left->getDepth() < right->getDepth(); 1450 1451 assert(left->NodeQueueId && right->NodeQueueId && 1452 "NodeQueueId cannot be zero"); 1453 return (left->NodeQueueId > right->NodeQueueId); 1454} 1455 1456// Bottom up 1457bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1458 return BURRSort(left, right, SPQ); 1459} 1460 1461// Source order, otherwise bottom up. 1462bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1463 unsigned LOrder = SPQ->getNodeOrdering(left); 1464 unsigned ROrder = SPQ->getNodeOrdering(right); 1465 1466 // Prefer an ordering where the lower the non-zero order number, the higher 1467 // the preference. 1468 if ((LOrder || ROrder) && LOrder != ROrder) 1469 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1470 1471 return BURRSort(left, right, SPQ); 1472} 1473 1474bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1475 bool LStall = left->SchedulingPref == Sched::Latency && 1476 SPQ->getCurCycle() < left->getHeight(); 1477 bool RStall = right->SchedulingPref == Sched::Latency && 1478 SPQ->getCurCycle() < right->getHeight(); 1479 // If scheduling one of the node will cause a pipeline stall, delay it. 1480 // If scheduling either one of the node will cause a pipeline stall, sort them 1481 // according to their height. 1482 // If neither will cause a pipeline stall, try to reduce register pressure. 1483 if (LStall) { 1484 if (!RStall) 1485 return true; 1486 if (left->getHeight() != right->getHeight()) 1487 return left->getHeight() > right->getHeight(); 1488 } else if (RStall) 1489 return false; 1490 1491 // If either node is scheduling for latency, sort them by height and latency 1492 // first. 1493 if (left->SchedulingPref == Sched::Latency || 1494 right->SchedulingPref == Sched::Latency) { 1495 if (left->getHeight() != right->getHeight()) 1496 return left->getHeight() > right->getHeight(); 1497 if (left->Latency != right->Latency) 1498 return left->Latency > right->Latency; 1499 } 1500 1501 return BURRSort(left, right, SPQ); 1502} 1503 1504template<class SF> 1505bool 1506RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1507 if (SU->isTwoAddress) { 1508 unsigned Opc = SU->getNode()->getMachineOpcode(); 1509 const TargetInstrDesc &TID = TII->get(Opc); 1510 unsigned NumRes = TID.getNumDefs(); 1511 unsigned NumOps = TID.getNumOperands() - NumRes; 1512 for (unsigned i = 0; i != NumOps; ++i) { 1513 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1514 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1515 if (DU->getNodeId() != -1 && 1516 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1517 return true; 1518 } 1519 } 1520 } 1521 return false; 1522} 1523 1524/// hasCopyToRegUse - Return true if SU has a value successor that is a 1525/// CopyToReg node. 1526static bool hasCopyToRegUse(const SUnit *SU) { 1527 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1528 I != E; ++I) { 1529 if (I->isCtrl()) continue; 1530 const SUnit *SuccSU = I->getSUnit(); 1531 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1532 return true; 1533 } 1534 return false; 1535} 1536 1537/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1538/// physical register defs. 1539static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1540 const TargetInstrInfo *TII, 1541 const TargetRegisterInfo *TRI) { 1542 SDNode *N = SuccSU->getNode(); 1543 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1544 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1545 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1546 for (const SDNode *SUNode = SU->getNode(); SUNode; 1547 SUNode = SUNode->getFlaggedNode()) { 1548 if (!SUNode->isMachineOpcode()) 1549 continue; 1550 const unsigned *SUImpDefs = 1551 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1552 if (!SUImpDefs) 1553 return false; 1554 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1555 EVT VT = N->getValueType(i); 1556 if (VT == MVT::Flag || VT == MVT::Other) 1557 continue; 1558 if (!N->hasAnyUseOfValue(i)) 1559 continue; 1560 unsigned Reg = ImpDefs[i - NumDefs]; 1561 for (;*SUImpDefs; ++SUImpDefs) { 1562 unsigned SUReg = *SUImpDefs; 1563 if (TRI->regsOverlap(Reg, SUReg)) 1564 return true; 1565 } 1566 } 1567 } 1568 return false; 1569} 1570 1571/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1572/// are not handled well by the general register pressure reduction 1573/// heuristics. When presented with code like this: 1574/// 1575/// N 1576/// / | 1577/// / | 1578/// U store 1579/// | 1580/// ... 1581/// 1582/// the heuristics tend to push the store up, but since the 1583/// operand of the store has another use (U), this would increase 1584/// the length of that other use (the U->N edge). 1585/// 1586/// This function transforms code like the above to route U's 1587/// dependence through the store when possible, like this: 1588/// 1589/// N 1590/// || 1591/// || 1592/// store 1593/// | 1594/// U 1595/// | 1596/// ... 1597/// 1598/// This results in the store being scheduled immediately 1599/// after N, which shortens the U->N live range, reducing 1600/// register pressure. 1601/// 1602template<class SF> 1603void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1604 // Visit all the nodes in topological order, working top-down. 1605 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1606 SUnit *SU = &(*SUnits)[i]; 1607 // For now, only look at nodes with no data successors, such as stores. 1608 // These are especially important, due to the heuristics in 1609 // getNodePriority for nodes with no data successors. 1610 if (SU->NumSuccs != 0) 1611 continue; 1612 // For now, only look at nodes with exactly one data predecessor. 1613 if (SU->NumPreds != 1) 1614 continue; 1615 // Avoid prescheduling copies to virtual registers, which don't behave 1616 // like other nodes from the perspective of scheduling heuristics. 1617 if (SDNode *N = SU->getNode()) 1618 if (N->getOpcode() == ISD::CopyToReg && 1619 TargetRegisterInfo::isVirtualRegister 1620 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1621 continue; 1622 1623 // Locate the single data predecessor. 1624 SUnit *PredSU = 0; 1625 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1626 EE = SU->Preds.end(); II != EE; ++II) 1627 if (!II->isCtrl()) { 1628 PredSU = II->getSUnit(); 1629 break; 1630 } 1631 assert(PredSU); 1632 1633 // Don't rewrite edges that carry physregs, because that requires additional 1634 // support infrastructure. 1635 if (PredSU->hasPhysRegDefs) 1636 continue; 1637 // Short-circuit the case where SU is PredSU's only data successor. 1638 if (PredSU->NumSuccs == 1) 1639 continue; 1640 // Avoid prescheduling to copies from virtual registers, which don't behave 1641 // like other nodes from the perspective of scheduling // heuristics. 1642 if (SDNode *N = SU->getNode()) 1643 if (N->getOpcode() == ISD::CopyFromReg && 1644 TargetRegisterInfo::isVirtualRegister 1645 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1646 continue; 1647 1648 // Perform checks on the successors of PredSU. 1649 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1650 EE = PredSU->Succs.end(); II != EE; ++II) { 1651 SUnit *PredSuccSU = II->getSUnit(); 1652 if (PredSuccSU == SU) continue; 1653 // If PredSU has another successor with no data successors, for 1654 // now don't attempt to choose either over the other. 1655 if (PredSuccSU->NumSuccs == 0) 1656 goto outer_loop_continue; 1657 // Don't break physical register dependencies. 1658 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1659 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1660 goto outer_loop_continue; 1661 // Don't introduce graph cycles. 1662 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1663 goto outer_loop_continue; 1664 } 1665 1666 // Ok, the transformation is safe and the heuristics suggest it is 1667 // profitable. Update the graph. 1668 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1669 << " next to PredSU #" << PredSU->NodeNum 1670 << " to guide scheduling in the presence of multiple uses\n"); 1671 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1672 SDep Edge = PredSU->Succs[i]; 1673 assert(!Edge.isAssignedRegDep()); 1674 SUnit *SuccSU = Edge.getSUnit(); 1675 if (SuccSU != SU) { 1676 Edge.setSUnit(PredSU); 1677 scheduleDAG->RemovePred(SuccSU, Edge); 1678 scheduleDAG->AddPred(SU, Edge); 1679 Edge.setSUnit(SU); 1680 scheduleDAG->AddPred(SuccSU, Edge); 1681 --i; 1682 } 1683 } 1684 outer_loop_continue:; 1685 } 1686} 1687 1688/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1689/// it as a def&use operand. Add a pseudo control edge from it to the other 1690/// node (if it won't create a cycle) so the two-address one will be scheduled 1691/// first (lower in the schedule). If both nodes are two-address, favor the 1692/// one that has a CopyToReg use (more likely to be a loop induction update). 1693/// If both are two-address, but one is commutable while the other is not 1694/// commutable, favor the one that's not commutable. 1695template<class SF> 1696void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1697 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1698 SUnit *SU = &(*SUnits)[i]; 1699 if (!SU->isTwoAddress) 1700 continue; 1701 1702 SDNode *Node = SU->getNode(); 1703 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1704 continue; 1705 1706 unsigned Opc = Node->getMachineOpcode(); 1707 const TargetInstrDesc &TID = TII->get(Opc); 1708 unsigned NumRes = TID.getNumDefs(); 1709 unsigned NumOps = TID.getNumOperands() - NumRes; 1710 for (unsigned j = 0; j != NumOps; ++j) { 1711 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1712 continue; 1713 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1714 if (DU->getNodeId() == -1) 1715 continue; 1716 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1717 if (!DUSU) continue; 1718 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1719 E = DUSU->Succs.end(); I != E; ++I) { 1720 if (I->isCtrl()) continue; 1721 SUnit *SuccSU = I->getSUnit(); 1722 if (SuccSU == SU) 1723 continue; 1724 // Be conservative. Ignore if nodes aren't at roughly the same 1725 // depth and height. 1726 if (SuccSU->getHeight() < SU->getHeight() && 1727 (SU->getHeight() - SuccSU->getHeight()) > 1) 1728 continue; 1729 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1730 // constrains whatever is using the copy, instead of the copy 1731 // itself. In the case that the copy is coalesced, this 1732 // preserves the intent of the pseudo two-address heurietics. 1733 while (SuccSU->Succs.size() == 1 && 1734 SuccSU->getNode()->isMachineOpcode() && 1735 SuccSU->getNode()->getMachineOpcode() == 1736 TargetOpcode::COPY_TO_REGCLASS) 1737 SuccSU = SuccSU->Succs.front().getSUnit(); 1738 // Don't constrain non-instruction nodes. 1739 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1740 continue; 1741 // Don't constrain nodes with physical register defs if the 1742 // predecessor can clobber them. 1743 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1744 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1745 continue; 1746 } 1747 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1748 // these may be coalesced away. We want them close to their uses. 1749 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1750 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1751 SuccOpc == TargetOpcode::INSERT_SUBREG || 1752 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1753 continue; 1754 if ((!canClobber(SuccSU, DUSU) || 1755 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1756 (!SU->isCommutable && SuccSU->isCommutable)) && 1757 !scheduleDAG->IsReachable(SuccSU, SU)) { 1758 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1759 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1760 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1761 /*Reg=*/0, /*isNormalMemory=*/false, 1762 /*isMustAlias=*/false, 1763 /*isArtificial=*/true)); 1764 } 1765 } 1766 } 1767 } 1768} 1769 1770/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1771/// scheduling units. 1772template<class SF> 1773void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1774 SethiUllmanNumbers.assign(SUnits->size(), 0); 1775 1776 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1777 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1778} 1779 1780/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1781/// predecessors of the successors of the SUnit SU. Stop when the provided 1782/// limit is exceeded. 1783static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1784 unsigned Limit) { 1785 unsigned Sum = 0; 1786 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1787 I != E; ++I) { 1788 const SUnit *SuccSU = I->getSUnit(); 1789 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1790 EE = SuccSU->Preds.end(); II != EE; ++II) { 1791 SUnit *PredSU = II->getSUnit(); 1792 if (!PredSU->isScheduled) 1793 if (++Sum > Limit) 1794 return Sum; 1795 } 1796 } 1797 return Sum; 1798} 1799 1800 1801// Top down 1802bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1803 unsigned LPriority = SPQ->getNodePriority(left); 1804 unsigned RPriority = SPQ->getNodePriority(right); 1805 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1806 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1807 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1808 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1809 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1810 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1811 1812 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1813 return false; 1814 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1815 return true; 1816 1817 if (LIsFloater) 1818 LBonus -= 2; 1819 if (RIsFloater) 1820 RBonus -= 2; 1821 if (left->NumSuccs == 1) 1822 LBonus += 2; 1823 if (right->NumSuccs == 1) 1824 RBonus += 2; 1825 1826 if (LPriority+LBonus != RPriority+RBonus) 1827 return LPriority+LBonus < RPriority+RBonus; 1828 1829 if (left->getDepth() != right->getDepth()) 1830 return left->getDepth() < right->getDepth(); 1831 1832 if (left->NumSuccsLeft != right->NumSuccsLeft) 1833 return left->NumSuccsLeft > right->NumSuccsLeft; 1834 1835 assert(left->NodeQueueId && right->NodeQueueId && 1836 "NodeQueueId cannot be zero"); 1837 return (left->NodeQueueId > right->NodeQueueId); 1838} 1839 1840//===----------------------------------------------------------------------===// 1841// Public Constructor Functions 1842//===----------------------------------------------------------------------===// 1843 1844llvm::ScheduleDAGSDNodes * 1845llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1846 const TargetMachine &TM = IS->TM; 1847 const TargetInstrInfo *TII = TM.getInstrInfo(); 1848 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1849 1850 BURegReductionPriorityQueue *PQ = 1851 new BURegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0); 1852 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1853 PQ->setScheduleDAG(SD); 1854 return SD; 1855} 1856 1857llvm::ScheduleDAGSDNodes * 1858llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1859 const TargetMachine &TM = IS->TM; 1860 const TargetInstrInfo *TII = TM.getInstrInfo(); 1861 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1862 1863 TDRegReductionPriorityQueue *PQ = 1864 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 1865 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 1866 PQ->setScheduleDAG(SD); 1867 return SD; 1868} 1869 1870llvm::ScheduleDAGSDNodes * 1871llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1872 const TargetMachine &TM = IS->TM; 1873 const TargetInstrInfo *TII = TM.getInstrInfo(); 1874 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1875 1876 SrcRegReductionPriorityQueue *PQ = 1877 new SrcRegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0); 1878 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1879 PQ->setScheduleDAG(SD); 1880 return SD; 1881} 1882 1883llvm::ScheduleDAGSDNodes * 1884llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1885 const TargetMachine &TM = IS->TM; 1886 const TargetInstrInfo *TII = TM.getInstrInfo(); 1887 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1888 const TargetLowering *TLI = &IS->getTargetLowering(); 1889 1890 HybridBURRPriorityQueue *PQ = 1891 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, 1892 (RegPressureAware ? TLI : 0)); 1893 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 1894 PQ->setScheduleDAG(SD); 1895 return SD; 1896} 1897