ScheduleDAGInstrs.cpp revision f7119393a97c2a10757084b6bc186380f8c19a73
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "llvm/CodeGen/MachineDominators.h" 17#include "llvm/CodeGen/MachineFunctionPass.h" 18#include "llvm/CodeGen/MachineLoopInfo.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/ScheduleDAGInstrs.h" 21#include "llvm/CodeGen/PseudoSourceValue.h" 22#include "llvm/Target/TargetMachine.h" 23#include "llvm/Target/TargetInstrInfo.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetSubtarget.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/Support/raw_ostream.h" 29#include "llvm/ADT/SmallSet.h" 30#include <map> 31using namespace llvm; 32 33namespace { 34 class VISIBILITY_HIDDEN LoopDependencies { 35 const MachineLoopInfo &MLI; 36 const MachineDominatorTree &MDT; 37 38 public: 39 typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > 40 LoopDeps; 41 LoopDeps Deps; 42 43 LoopDependencies(const MachineLoopInfo &mli, 44 const MachineDominatorTree &mdt) : 45 MLI(mli), MDT(mdt) {} 46 47 void VisitLoop(const MachineLoop *Loop) { 48 Deps.clear(); 49 MachineBasicBlock *Header = Loop->getHeader(); 50 SmallSet<unsigned, 8> LoopLiveIns; 51 for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), 52 LE = Header->livein_end(); LI != LE; ++LI) 53 LoopLiveIns.insert(*LI); 54 55 const MachineDomTreeNode *Node = MDT.getNode(Header); 56 const MachineBasicBlock *MBB = Node->getBlock(); 57 assert(Loop->contains(MBB) && 58 "Loop does not contain header!"); 59 VisitRegion(Node, MBB, Loop, LoopLiveIns); 60 } 61 62 private: 63 void VisitRegion(const MachineDomTreeNode *Node, 64 const MachineBasicBlock *MBB, 65 const MachineLoop *Loop, 66 const SmallSet<unsigned, 8> &LoopLiveIns) { 67 unsigned Count = 0; 68 for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); 69 I != E; ++I, ++Count) { 70 const MachineInstr *MI = I; 71 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 72 const MachineOperand &MO = MI->getOperand(i); 73 if (!MO.isReg() || !MO.isUse()) 74 continue; 75 unsigned MOReg = MO.getReg(); 76 if (LoopLiveIns.count(MOReg)) 77 Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); 78 } 79 } 80 81 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 82 for (std::vector<MachineDomTreeNode*>::const_iterator I = 83 Children.begin(), E = Children.end(); I != E; ++I) { 84 const MachineDomTreeNode *ChildNode = *I; 85 MachineBasicBlock *ChildBlock = ChildNode->getBlock(); 86 if (Loop->contains(ChildBlock)) 87 VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); 88 } 89 } 90 }; 91} 92 93ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 94 const MachineLoopInfo &mli, 95 const MachineDominatorTree &mdt) 96 : ScheduleDAG(mf), MLI(mli), MDT(mdt) {} 97 98void ScheduleDAGInstrs::BuildSchedGraph() { 99 SUnits.reserve(BB->size()); 100 101 // We build scheduling units by walking a block's instruction list from bottom 102 // to top. 103 104 // Remember where a generic side-effecting instruction is as we procede. If 105 // ChainMMO is null, this is assumed to have arbitrary side-effects. If 106 // ChainMMO is non-null, then Chain makes only a single memory reference. 107 SUnit *Chain = 0; 108 MachineMemOperand *ChainMMO = 0; 109 110 // Memory references to specific known memory locations are tracked so that 111 // they can be given more precise dependencies. 112 std::map<const Value *, SUnit *> MemDefs; 113 std::map<const Value *, std::vector<SUnit *> > MemUses; 114 115 // If we have an SUnit which is representing a terminator instruction, we 116 // can use it as a place-holder successor for inter-block dependencies. 117 SUnit *Terminator = 0; 118 119 // Terminators can perform control transfers, we we need to make sure that 120 // all the work of the block is done before the terminator. Labels can 121 // mark points of interest for various types of meta-data (eg. EH data), 122 // and we need to make sure nothing is scheduled around them. 123 SUnit *SchedulingBarrier = 0; 124 125 LoopDependencies LoopRegs(MLI, MDT); 126 127 // Track which regs are live into a loop, to help guide back-edge-aware 128 // scheduling. 129 SmallSet<unsigned, 8> LoopLiveInRegs; 130 if (MachineLoop *ML = MLI.getLoopFor(BB)) 131 if (BB == ML->getLoopLatch()) { 132 MachineBasicBlock *Header = ML->getHeader(); 133 for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), 134 E = Header->livein_end(); I != E; ++I) 135 LoopLiveInRegs.insert(*I); 136 LoopRegs.VisitLoop(ML); 137 } 138 139 // Check to see if the scheduler cares about latencies. 140 bool UnitLatencies = ForceUnitLatencies(); 141 142 // Ask the target if address-backscheduling is desirable, and if so how much. 143 unsigned SpecialAddressLatency = 144 TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency(); 145 146 for (MachineBasicBlock::iterator MII = End, MIE = Begin; 147 MII != MIE; --MII) { 148 MachineInstr *MI = prior(MII); 149 const TargetInstrDesc &TID = MI->getDesc(); 150 SUnit *SU = NewSUnit(MI); 151 152 // Assign the Latency field of SU using target-provided information. 153 if (UnitLatencies) 154 SU->Latency = 1; 155 else 156 ComputeLatency(SU); 157 158 // Add register-based dependencies (data, anti, and output). 159 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 160 const MachineOperand &MO = MI->getOperand(j); 161 if (!MO.isReg()) continue; 162 unsigned Reg = MO.getReg(); 163 if (Reg == 0) continue; 164 165 assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); 166 std::vector<SUnit *> &UseList = Uses[Reg]; 167 std::vector<SUnit *> &DefList = Defs[Reg]; 168 // Optionally add output and anti dependencies. 169 // TODO: Using a latency of 1 here assumes there's no cost for 170 // reusing registers. 171 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 172 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 173 SUnit *DefSU = DefList[i]; 174 if (DefSU != SU && 175 (Kind != SDep::Output || !MO.isDead() || 176 !DefSU->getInstr()->registerDefIsDead(Reg))) 177 DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); 178 } 179 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 180 std::vector<SUnit *> &DefList = Defs[*Alias]; 181 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 182 SUnit *DefSU = DefList[i]; 183 if (DefSU != SU && 184 (Kind != SDep::Output || !MO.isDead() || 185 !DefSU->getInstr()->registerDefIsDead(Reg))) 186 DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); 187 } 188 } 189 190 if (MO.isDef()) { 191 // Add any data dependencies. 192 unsigned DataLatency = SU->Latency; 193 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 194 SUnit *UseSU = UseList[i]; 195 if (UseSU != SU) { 196 unsigned LDataLatency = DataLatency; 197 // Optionally add in a special extra latency for nodes that 198 // feed addresses. 199 // TODO: Do this for register aliases too. 200 if (SpecialAddressLatency != 0 && !UnitLatencies) { 201 MachineInstr *UseMI = UseSU->getInstr(); 202 const TargetInstrDesc &UseTID = UseMI->getDesc(); 203 int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); 204 assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); 205 if ((UseTID.mayLoad() || UseTID.mayStore()) && 206 (unsigned)RegUseIndex < UseTID.getNumOperands() && 207 UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 208 LDataLatency += SpecialAddressLatency; 209 } 210 UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); 211 } 212 } 213 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 214 std::vector<SUnit *> &UseList = Uses[*Alias]; 215 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 216 SUnit *UseSU = UseList[i]; 217 if (UseSU != SU) 218 UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); 219 } 220 } 221 222 // If a def is going to wrap back around to the top of the loop, 223 // backschedule it. 224 // TODO: Blocks in loops without terminators can benefit too. 225 if (!UnitLatencies && Terminator && DefList.empty()) { 226 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); 227 if (I != LoopRegs.Deps.end()) { 228 const MachineOperand *UseMO = I->second.first; 229 unsigned Count = I->second.second; 230 const MachineInstr *UseMI = UseMO->getParent(); 231 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 232 const TargetInstrDesc &UseTID = UseMI->getDesc(); 233 // TODO: If we knew the total depth of the region here, we could 234 // handle the case where the whole loop is inside the region but 235 // is large enough that the isScheduleHigh trick isn't needed. 236 if (UseMOIdx < UseTID.getNumOperands()) { 237 // Currently, we only support scheduling regions consisting of 238 // single basic blocks. Check to see if the instruction is in 239 // the same region by checking to see if it has the same parent. 240 if (UseMI->getParent() != MI->getParent()) { 241 unsigned Latency = SU->Latency; 242 if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 243 Latency += SpecialAddressLatency; 244 // This is a wild guess as to the portion of the latency which 245 // will be overlapped by work done outside the current 246 // scheduling region. 247 Latency -= std::min(Latency, Count); 248 // Add the artifical edge. 249 Terminator->addPred(SDep(SU, SDep::Order, Latency, 250 /*Reg=*/0, /*isNormalMemory=*/false, 251 /*isMustAlias=*/false, 252 /*isArtificial=*/true)); 253 } else if (SpecialAddressLatency > 0 && 254 UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 255 // The entire loop body is within the current scheduling region 256 // and the latency of this operation is assumed to be greater 257 // than the latency of the loop. 258 // TODO: Recursively mark data-edge predecessors as 259 // isScheduleHigh too. 260 SU->isScheduleHigh = true; 261 } 262 } 263 LoopRegs.Deps.erase(I); 264 } 265 } 266 267 UseList.clear(); 268 if (!MO.isDead()) 269 DefList.clear(); 270 DefList.push_back(SU); 271 } else { 272 UseList.push_back(SU); 273 } 274 } 275 276 // Add chain dependencies. 277 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 278 // after stack slots are lowered to actual addresses. 279 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 280 // produce more precise dependence information. 281 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) { 282 new_chain: 283 // This is the conservative case. Add dependencies on all memory 284 // references. 285 if (Chain) 286 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 287 Chain = SU; 288 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 289 PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); 290 PendingLoads.clear(); 291 for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), 292 E = MemDefs.end(); I != E; ++I) { 293 I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); 294 I->second = SU; 295 } 296 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 297 MemUses.begin(), E = MemUses.end(); I != E; ++I) { 298 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 299 I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency)); 300 I->second.clear(); 301 } 302 // See if it is known to just have a single memory reference. 303 MachineInstr *ChainMI = Chain->getInstr(); 304 const TargetInstrDesc &ChainTID = ChainMI->getDesc(); 305 if (!ChainTID.isCall() && !ChainTID.isTerminator() && 306 !ChainTID.hasUnmodeledSideEffects() && 307 ChainMI->hasOneMemOperand() && 308 !ChainMI->memoperands_begin()->isVolatile() && 309 ChainMI->memoperands_begin()->getValue()) 310 // We know that the Chain accesses one specific memory location. 311 ChainMMO = &*ChainMI->memoperands_begin(); 312 else 313 // Unknown memory accesses. Assume the worst. 314 ChainMMO = 0; 315 } else if (TID.mayStore()) { 316 if (MI->hasOneMemOperand() && 317 MI->memoperands_begin()->getValue() && 318 !MI->memoperands_begin()->isVolatile() && 319 isa<PseudoSourceValue>(MI->memoperands_begin()->getValue())) { 320 // A store to a specific PseudoSourceValue. Add precise dependencies. 321 const Value *V = MI->memoperands_begin()->getValue(); 322 // Handle the def in MemDefs, if there is one. 323 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 324 if (I != MemDefs.end()) { 325 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 326 /*isNormalMemory=*/true)); 327 I->second = SU; 328 } else { 329 MemDefs[V] = SU; 330 } 331 // Handle the uses in MemUses, if there are any. 332 std::map<const Value *, std::vector<SUnit *> >::iterator J = 333 MemUses.find(V); 334 if (J != MemUses.end()) { 335 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 336 J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 337 /*isNormalMemory=*/true)); 338 J->second.clear(); 339 } 340 // Add a general dependence too, if needed. 341 if (Chain) 342 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 343 } else 344 // Treat all other stores conservatively. 345 goto new_chain; 346 } else if (TID.mayLoad()) { 347 if (TII->isInvariantLoad(MI)) { 348 // Invariant load, no chain dependencies needed! 349 } else if (MI->hasOneMemOperand() && 350 MI->memoperands_begin()->getValue() && 351 !MI->memoperands_begin()->isVolatile() && 352 isa<PseudoSourceValue>(MI->memoperands_begin()->getValue())) { 353 // A load from a specific PseudoSourceValue. Add precise dependencies. 354 const Value *V = MI->memoperands_begin()->getValue(); 355 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 356 if (I != MemDefs.end()) 357 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 358 /*isNormalMemory=*/true)); 359 MemUses[V].push_back(SU); 360 361 // Add a general dependence too, if needed. 362 if (Chain && (!ChainMMO || 363 (ChainMMO->isStore() || ChainMMO->isVolatile()))) 364 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 365 } else if (MI->hasVolatileMemoryRef()) { 366 // Treat volatile loads conservatively. Note that this includes 367 // cases where memoperand information is unavailable. 368 goto new_chain; 369 } else { 370 // A normal load. Just depend on the general chain. 371 if (Chain) 372 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 373 PendingLoads.push_back(SU); 374 } 375 } 376 377 // Add chain edges from terminators and labels to ensure that no 378 // instructions are scheduled past them. 379 if (SchedulingBarrier && SU->Succs.empty()) 380 SchedulingBarrier->addPred(SDep(SU, SDep::Order, SU->Latency)); 381 // If we encounter a mid-block label, we need to go back and add 382 // dependencies on SUnits we've already processed to prevent the 383 // label from moving downward. 384 if (MI->isLabel()) 385 for (SUnit *I = SU; I != &SUnits[0]; --I) { 386 SUnit *SuccSU = SU-1; 387 SuccSU->addPred(SDep(SU, SDep::Order, SU->Latency)); 388 MachineInstr *SuccMI = SuccSU->getInstr(); 389 if (SuccMI->getDesc().isTerminator() || SuccMI->isLabel()) 390 break; 391 } 392 // If this instruction obstructs all scheduling, remember it. 393 if (TID.isTerminator() || MI->isLabel()) 394 SchedulingBarrier = SU; 395 // If this instruction is a terminator, remember it. 396 if (TID.isTerminator()) 397 Terminator = SU; 398 } 399 400 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 401 Defs[i].clear(); 402 Uses[i].clear(); 403 } 404 PendingLoads.clear(); 405} 406 407void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 408 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 409 410 // Compute the latency for the node. We use the sum of the latencies for 411 // all nodes flagged together into this SUnit. 412 SU->Latency = 413 InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); 414 415 // Simplistic target-independent heuristic: assume that loads take 416 // extra time. 417 if (InstrItins.isEmpty()) 418 if (SU->getInstr()->getDesc().mayLoad()) 419 SU->Latency += 2; 420} 421 422void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 423 SU->getInstr()->dump(); 424} 425 426std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 427 std::string s; 428 raw_string_ostream oss(s); 429 SU->getInstr()->print(oss); 430 return oss.str(); 431} 432 433// EmitSchedule - Emit the machine code in scheduled order. 434MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { 435 // For MachineInstr-based scheduling, we're rescheduling the instructions in 436 // the block, so start by removing them from the block. 437 while (Begin != End) { 438 MachineBasicBlock::iterator I = Begin; 439 ++Begin; 440 BB->remove(I); 441 } 442 443 // Then re-insert them according to the given schedule. 444 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 445 SUnit *SU = Sequence[i]; 446 if (!SU) { 447 // Null SUnit* is a noop. 448 EmitNoop(); 449 continue; 450 } 451 452 BB->insert(End, SU->getInstr()); 453 } 454 455 return BB; 456} 457