ScheduleDAGInstrs.cpp revision 3f23744df4809eba94284e601e81489212c974d4
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "llvm/CodeGen/MachineDominators.h" 17#include "llvm/CodeGen/MachineFunctionPass.h" 18#include "llvm/CodeGen/MachineRegisterInfo.h" 19#include "llvm/CodeGen/ScheduleDAGInstrs.h" 20#include "llvm/CodeGen/PseudoSourceValue.h" 21#include "llvm/Target/TargetMachine.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetSubtarget.h" 25#include "llvm/Support/Compiler.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/raw_ostream.h" 28#include "llvm/ADT/SmallSet.h" 29#include <map> 30using namespace llvm; 31 32ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb, 33 const TargetMachine &tm, 34 const MachineLoopInfo &mli, 35 const MachineDominatorTree &mdt) 36 : ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {} 37 38void ScheduleDAGInstrs::BuildSchedUnits() { 39 SUnits.clear(); 40 SUnits.reserve(BB->size()); 41 42 // We build scheduling units by walking a block's instruction list from bottom 43 // to top. 44 45 // Remember where defs and uses of each physical register are as we procede. 46 std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {}; 47 std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {}; 48 49 // Remember where unknown loads are after the most recent unknown store 50 // as we procede. 51 std::vector<SUnit *> PendingLoads; 52 53 // Remember where a generic side-effecting instruction is as we procede. If 54 // ChainMMO is null, this is assumed to have arbitrary side-effects. If 55 // ChainMMO is non-null, then Chain makes only a single memory reference. 56 SUnit *Chain = 0; 57 MachineMemOperand *ChainMMO = 0; 58 59 // Memory references to specific known memory locations are tracked so that 60 // they can be given more precise dependencies. 61 std::map<const Value *, SUnit *> MemDefs; 62 std::map<const Value *, std::vector<SUnit *> > MemUses; 63 64 // Terminators can perform control transfers, we we need to make sure that 65 // all the work of the block is done before the terminator. 66 SUnit *Terminator = 0; 67 68 // Check to see if the scheduler cares about latencies. 69 bool UnitLatencies = ForceUnitLatencies(); 70 71 for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin(); 72 MII != MIE; --MII) { 73 MachineInstr *MI = prior(MII); 74 const TargetInstrDesc &TID = MI->getDesc(); 75 SUnit *SU = NewSUnit(MI); 76 77 // Assign the Latency field of SU using target-provided information. 78 if (UnitLatencies) 79 SU->Latency = 1; 80 else 81 ComputeLatency(SU); 82 83 // Add register-based dependencies (data, anti, and output). 84 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 85 const MachineOperand &MO = MI->getOperand(j); 86 if (!MO.isReg()) continue; 87 unsigned Reg = MO.getReg(); 88 if (Reg == 0) continue; 89 90 assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); 91 std::vector<SUnit *> &UseList = Uses[Reg]; 92 std::vector<SUnit *> &DefList = Defs[Reg]; 93 // Optionally add output and anti dependencies. 94 // TODO: Using a latency of 1 here assumes there's no cost for 95 // reusing registers. 96 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 97 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 98 SUnit *DefSU = DefList[i]; 99 if (DefSU != SU && 100 (Kind != SDep::Output || !MO.isDead() || 101 !DefSU->getInstr()->registerDefIsDead(Reg))) 102 DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); 103 } 104 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 105 std::vector<SUnit *> &DefList = Defs[*Alias]; 106 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 107 SUnit *DefSU = DefList[i]; 108 if (DefSU != SU && 109 (Kind != SDep::Output || !MO.isDead() || 110 !DefSU->getInstr()->registerDefIsDead(Reg))) 111 DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); 112 } 113 } 114 115 if (MO.isDef()) { 116 // Add any data dependencies. 117 unsigned DataLatency = SU->Latency; 118 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 119 SUnit *UseSU = UseList[i]; 120 if (UseSU != SU) { 121 UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg)); 122 } 123 } 124 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 125 std::vector<SUnit *> &UseList = Uses[*Alias]; 126 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 127 SUnit *UseSU = UseList[i]; 128 if (UseSU != SU) 129 UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); 130 } 131 } 132 133 UseList.clear(); 134 if (!MO.isDead()) 135 DefList.clear(); 136 DefList.push_back(SU); 137 } else { 138 UseList.push_back(SU); 139 } 140 } 141 142 // Add chain dependencies. 143 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 144 // after stack slots are lowered to actual addresses. 145 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 146 // produce more precise dependence information. 147 if (TID.isCall() || TID.isReturn() || TID.isBranch() || 148 TID.hasUnmodeledSideEffects()) { 149 new_chain: 150 // This is the conservative case. Add dependencies on all memory 151 // references. 152 if (Chain) 153 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 154 Chain = SU; 155 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 156 PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); 157 PendingLoads.clear(); 158 for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), 159 E = MemDefs.end(); I != E; ++I) { 160 I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); 161 I->second = SU; 162 } 163 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 164 MemUses.begin(), E = MemUses.end(); I != E; ++I) { 165 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 166 I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency)); 167 I->second.clear(); 168 } 169 // See if it is known to just have a single memory reference. 170 MachineInstr *ChainMI = Chain->getInstr(); 171 const TargetInstrDesc &ChainTID = ChainMI->getDesc(); 172 if (!ChainTID.isCall() && !ChainTID.isReturn() && !ChainTID.isBranch() && 173 !ChainTID.hasUnmodeledSideEffects() && 174 ChainMI->hasOneMemOperand() && 175 !ChainMI->memoperands_begin()->isVolatile() && 176 ChainMI->memoperands_begin()->getValue()) 177 // We know that the Chain accesses one specific memory location. 178 ChainMMO = &*ChainMI->memoperands_begin(); 179 else 180 // Unknown memory accesses. Assume the worst. 181 ChainMMO = 0; 182 } else if (TID.mayStore()) { 183 if (MI->hasOneMemOperand() && 184 MI->memoperands_begin()->getValue() && 185 !MI->memoperands_begin()->isVolatile() && 186 isa<PseudoSourceValue>(MI->memoperands_begin()->getValue())) { 187 // A store to a specific PseudoSourceValue. Add precise dependencies. 188 const Value *V = MI->memoperands_begin()->getValue(); 189 // Handle the def in MemDefs, if there is one. 190 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 191 if (I != MemDefs.end()) { 192 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 193 /*isNormalMemory=*/true)); 194 I->second = SU; 195 } else { 196 MemDefs[V] = SU; 197 } 198 // Handle the uses in MemUses, if there are any. 199 std::map<const Value *, std::vector<SUnit *> >::iterator J = 200 MemUses.find(V); 201 if (J != MemUses.end()) { 202 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 203 J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 204 /*isNormalMemory=*/true)); 205 J->second.clear(); 206 } 207 // Add a general dependence too, if needed. 208 if (Chain) 209 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 210 } else 211 // Treat all other stores conservatively. 212 goto new_chain; 213 } else if (TID.mayLoad()) { 214 if (TII->isInvariantLoad(MI)) { 215 // Invariant load, no chain dependencies needed! 216 } else if (MI->hasOneMemOperand() && 217 MI->memoperands_begin()->getValue() && 218 !MI->memoperands_begin()->isVolatile() && 219 isa<PseudoSourceValue>(MI->memoperands_begin()->getValue())) { 220 // A load from a specific PseudoSourceValue. Add precise dependencies. 221 const Value *V = MI->memoperands_begin()->getValue(); 222 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 223 if (I != MemDefs.end()) 224 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 225 /*isNormalMemory=*/true)); 226 MemUses[V].push_back(SU); 227 228 // Add a general dependence too, if needed. 229 if (Chain && (!ChainMMO || 230 (ChainMMO->isStore() || ChainMMO->isVolatile()))) 231 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 232 } else if (MI->hasVolatileMemoryRef()) { 233 // Treat volatile loads conservatively. Note that this includes 234 // cases where memoperand information is unavailable. 235 goto new_chain; 236 } else { 237 // A normal load. Just depend on the general chain. 238 if (Chain) 239 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 240 PendingLoads.push_back(SU); 241 } 242 } 243 244 // Add chain edges from the terminator to ensure that all the work of the 245 // block is completed before any control transfers. 246 if (Terminator && SU->Succs.empty()) 247 Terminator->addPred(SDep(SU, SDep::Order, SU->Latency)); 248 if (TID.isTerminator() || MI->isLabel()) 249 Terminator = SU; 250 } 251} 252 253void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 254 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 255 256 // Compute the latency for the node. We use the sum of the latencies for 257 // all nodes flagged together into this SUnit. 258 SU->Latency = 259 InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); 260 261 // Simplistic target-independent heuristic: assume that loads take 262 // extra time. 263 if (InstrItins.isEmpty()) 264 if (SU->getInstr()->getDesc().mayLoad()) 265 SU->Latency += 2; 266} 267 268void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 269 SU->getInstr()->dump(); 270} 271 272std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 273 std::string s; 274 raw_string_ostream oss(s); 275 SU->getInstr()->print(oss); 276 return oss.str(); 277} 278 279// EmitSchedule - Emit the machine code in scheduled order. 280MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { 281 // For MachineInstr-based scheduling, we're rescheduling the instructions in 282 // the block, so start by removing them from the block. 283 while (!BB->empty()) 284 BB->remove(BB->begin()); 285 286 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 287 SUnit *SU = Sequence[i]; 288 if (!SU) { 289 // Null SUnit* is a noop. 290 EmitNoop(); 291 continue; 292 } 293 294 BB->push_back(SU->getInstr()); 295 } 296 297 return BB; 298} 299