ScheduleDAGInstrs.cpp revision 91203cf87b8a72227c78a9eeae8bb744dfcc4ef0
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "ScheduleDAGInstrs.h" 17#include "llvm/Operator.h" 18#include "llvm/Analysis/AliasAnalysis.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineMemOperand.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/CodeGen/PseudoSourceValue.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Target/TargetRegisterInfo.h" 26#include "llvm/Target/TargetSubtarget.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/Support/raw_ostream.h" 29#include "llvm/ADT/SmallSet.h" 30using namespace llvm; 31 32ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 33 const MachineLoopInfo &mli, 34 const MachineDominatorTree &mdt) 35 : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) { 36 MFI = mf.getFrameInfo(); 37} 38 39/// Run - perform scheduling. 40/// 41void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, 42 MachineBasicBlock::iterator begin, 43 MachineBasicBlock::iterator end, 44 unsigned endcount) { 45 BB = bb; 46 Begin = begin; 47 InsertPosIndex = endcount; 48 49 ScheduleDAG::Run(bb, end); 50} 51 52/// getUnderlyingObjectFromInt - This is the function that does the work of 53/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 54static const Value *getUnderlyingObjectFromInt(const Value *V) { 55 do { 56 if (const Operator *U = dyn_cast<Operator>(V)) { 57 // If we find a ptrtoint, we can transfer control back to the 58 // regular getUnderlyingObjectFromInt. 59 if (U->getOpcode() == Instruction::PtrToInt) 60 return U->getOperand(0); 61 // If we find an add of a constant or a multiplied value, it's 62 // likely that the other operand will lead us to the base 63 // object. We don't have to worry about the case where the 64 // object address is somehow being computed by the multiply, 65 // because our callers only care when the result is an 66 // identifibale object. 67 if (U->getOpcode() != Instruction::Add || 68 (!isa<ConstantInt>(U->getOperand(1)) && 69 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 70 return V; 71 V = U->getOperand(0); 72 } else { 73 return V; 74 } 75 assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!"); 76 } while (1); 77} 78 79/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject 80/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 81static const Value *getUnderlyingObject(const Value *V) { 82 // First just call Value::getUnderlyingObject to let it do what it does. 83 do { 84 V = V->getUnderlyingObject(); 85 // If it found an inttoptr, use special code to continue climing. 86 if (Operator::getOpcode(V) != Instruction::IntToPtr) 87 break; 88 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 89 // If that succeeded in finding a pointer, continue the search. 90 if (!isa<PointerType>(O->getType())) 91 break; 92 V = O; 93 } while (1); 94 return V; 95} 96 97/// getUnderlyingObjectForInstr - If this machine instr has memory reference 98/// information and it can be tracked to a normal reference to a known 99/// object, return the Value for that object. Otherwise return null. 100static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 101 const MachineFrameInfo *MFI) { 102 if (!MI->hasOneMemOperand() || 103 !(*MI->memoperands_begin())->getValue() || 104 (*MI->memoperands_begin())->isVolatile()) 105 return 0; 106 107 const Value *V = (*MI->memoperands_begin())->getValue(); 108 if (!V) 109 return 0; 110 111 V = getUnderlyingObject(V); 112 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 113 // For now, ignore PseudoSourceValues which may alias LLVM IR values 114 // because the code that uses this function has no way to cope with 115 // such aliases. 116 if (PSV->isAliased(MFI)) 117 return 0; 118 return V; 119 } 120 121 if (isIdentifiedObject(V)) 122 return V; 123 124 return 0; 125} 126 127void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 128 if (MachineLoop *ML = MLI.getLoopFor(BB)) 129 if (BB == ML->getLoopLatch()) { 130 MachineBasicBlock *Header = ML->getHeader(); 131 for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), 132 E = Header->livein_end(); I != E; ++I) 133 LoopLiveInRegs.insert(*I); 134 LoopRegs.VisitLoop(ML); 135 } 136} 137 138void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 139 // We'll be allocating one SUnit for each instruction, plus one for 140 // the region exit node. 141 SUnits.reserve(BB->size()); 142 143 // We build scheduling units by walking a block's instruction list from bottom 144 // to top. 145 146 // Remember where a generic side-effecting instruction is as we procede. If 147 // ChainMMO is null, this is assumed to have arbitrary side-effects. If 148 // ChainMMO is non-null, then Chain makes only a single memory reference. 149 SUnit *Chain = 0; 150 MachineMemOperand *ChainMMO = 0; 151 152 // Memory references to specific known memory locations are tracked so that 153 // they can be given more precise dependencies. 154 std::map<const Value *, SUnit *> MemDefs; 155 std::map<const Value *, std::vector<SUnit *> > MemUses; 156 157 // Check to see if the scheduler cares about latencies. 158 bool UnitLatencies = ForceUnitLatencies(); 159 160 // Ask the target if address-backscheduling is desirable, and if so how much. 161 const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); 162 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 163 164 // Walk the list of instructions, from bottom moving up. 165 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 166 MII != MIE; --MII) { 167 MachineInstr *MI = prior(MII); 168 const TargetInstrDesc &TID = MI->getDesc(); 169 assert(!TID.isTerminator() && !MI->isLabel() && 170 "Cannot schedule terminators or labels!"); 171 // Create the SUnit for this MI. 172 SUnit *SU = NewSUnit(MI); 173 174 // Assign the Latency field of SU using target-provided information. 175 if (UnitLatencies) 176 SU->Latency = 1; 177 else 178 ComputeLatency(SU); 179 180 // Add register-based dependencies (data, anti, and output). 181 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 182 const MachineOperand &MO = MI->getOperand(j); 183 if (!MO.isReg()) continue; 184 unsigned Reg = MO.getReg(); 185 if (Reg == 0) continue; 186 187 assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); 188 std::vector<SUnit *> &UseList = Uses[Reg]; 189 std::vector<SUnit *> &DefList = Defs[Reg]; 190 // Optionally add output and anti dependencies. For anti 191 // dependencies we use a latency of 0 because for a multi-issue 192 // target we want to allow the defining instruction to issue 193 // in the same cycle as the using instruction. 194 // TODO: Using a latency of 1 here for output dependencies assumes 195 // there's no cost for reusing registers. 196 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 197 unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; 198 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 199 SUnit *DefSU = DefList[i]; 200 if (DefSU != SU && 201 (Kind != SDep::Output || !MO.isDead() || 202 !DefSU->getInstr()->registerDefIsDead(Reg))) 203 DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); 204 } 205 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 206 std::vector<SUnit *> &DefList = Defs[*Alias]; 207 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 208 SUnit *DefSU = DefList[i]; 209 if (DefSU != SU && 210 (Kind != SDep::Output || !MO.isDead() || 211 !DefSU->getInstr()->registerDefIsDead(*Alias))) 212 DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); 213 } 214 } 215 216 if (MO.isDef()) { 217 // Add any data dependencies. 218 unsigned DataLatency = SU->Latency; 219 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 220 SUnit *UseSU = UseList[i]; 221 if (UseSU != SU) { 222 unsigned LDataLatency = DataLatency; 223 // Optionally add in a special extra latency for nodes that 224 // feed addresses. 225 // TODO: Do this for register aliases too. 226 // TODO: Perhaps we should get rid of 227 // SpecialAddressLatency and just move this into 228 // adjustSchedDependency for the targets that care about 229 // it. 230 if (SpecialAddressLatency != 0 && !UnitLatencies) { 231 MachineInstr *UseMI = UseSU->getInstr(); 232 const TargetInstrDesc &UseTID = UseMI->getDesc(); 233 int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); 234 assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); 235 if ((UseTID.mayLoad() || UseTID.mayStore()) && 236 (unsigned)RegUseIndex < UseTID.getNumOperands() && 237 UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 238 LDataLatency += SpecialAddressLatency; 239 } 240 // Adjust the dependence latency using operand def/use 241 // information (if any), and then allow the target to 242 // perform its own adjustments. 243 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); 244 if (!UnitLatencies) { 245 ComputeOperandLatency(SU, UseSU, (SDep &)dep); 246 ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); 247 } 248 UseSU->addPred(dep); 249 } 250 } 251 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 252 std::vector<SUnit *> &UseList = Uses[*Alias]; 253 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 254 SUnit *UseSU = UseList[i]; 255 if (UseSU != SU) { 256 const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); 257 if (!UnitLatencies) { 258 ComputeOperandLatency(SU, UseSU, (SDep &)dep); 259 ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); 260 } 261 UseSU->addPred(dep); 262 } 263 } 264 } 265 266 // If a def is going to wrap back around to the top of the loop, 267 // backschedule it. 268 if (!UnitLatencies && DefList.empty()) { 269 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); 270 if (I != LoopRegs.Deps.end()) { 271 const MachineOperand *UseMO = I->second.first; 272 unsigned Count = I->second.second; 273 const MachineInstr *UseMI = UseMO->getParent(); 274 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 275 const TargetInstrDesc &UseTID = UseMI->getDesc(); 276 // TODO: If we knew the total depth of the region here, we could 277 // handle the case where the whole loop is inside the region but 278 // is large enough that the isScheduleHigh trick isn't needed. 279 if (UseMOIdx < UseTID.getNumOperands()) { 280 // Currently, we only support scheduling regions consisting of 281 // single basic blocks. Check to see if the instruction is in 282 // the same region by checking to see if it has the same parent. 283 if (UseMI->getParent() != MI->getParent()) { 284 unsigned Latency = SU->Latency; 285 if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 286 Latency += SpecialAddressLatency; 287 // This is a wild guess as to the portion of the latency which 288 // will be overlapped by work done outside the current 289 // scheduling region. 290 Latency -= std::min(Latency, Count); 291 // Add the artifical edge. 292 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 293 /*Reg=*/0, /*isNormalMemory=*/false, 294 /*isMustAlias=*/false, 295 /*isArtificial=*/true)); 296 } else if (SpecialAddressLatency > 0 && 297 UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 298 // The entire loop body is within the current scheduling region 299 // and the latency of this operation is assumed to be greater 300 // than the latency of the loop. 301 // TODO: Recursively mark data-edge predecessors as 302 // isScheduleHigh too. 303 SU->isScheduleHigh = true; 304 } 305 } 306 LoopRegs.Deps.erase(I); 307 } 308 } 309 310 UseList.clear(); 311 if (!MO.isDead()) 312 DefList.clear(); 313 DefList.push_back(SU); 314 } else { 315 UseList.push_back(SU); 316 } 317 } 318 319 // Add chain dependencies. 320 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 321 // after stack slots are lowered to actual addresses. 322 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 323 // produce more precise dependence information. 324 if (TID.isCall() || TID.hasUnmodeledSideEffects()) { 325 new_chain: 326 // This is the conservative case. Add dependencies on all memory 327 // references. 328 if (Chain) 329 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 330 Chain = SU; 331 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 332 PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); 333 PendingLoads.clear(); 334 for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), 335 E = MemDefs.end(); I != E; ++I) { 336 I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); 337 I->second = SU; 338 } 339 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 340 MemUses.begin(), E = MemUses.end(); I != E; ++I) { 341 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 342 I->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency)); 343 I->second.clear(); 344 } 345 // See if it is known to just have a single memory reference. 346 MachineInstr *ChainMI = Chain->getInstr(); 347 const TargetInstrDesc &ChainTID = ChainMI->getDesc(); 348 if (!ChainTID.isCall() && 349 !ChainTID.hasUnmodeledSideEffects() && 350 ChainMI->hasOneMemOperand() && 351 !(*ChainMI->memoperands_begin())->isVolatile() && 352 (*ChainMI->memoperands_begin())->getValue()) 353 // We know that the Chain accesses one specific memory location. 354 ChainMMO = *ChainMI->memoperands_begin(); 355 else 356 // Unknown memory accesses. Assume the worst. 357 ChainMMO = 0; 358 } else if (TID.mayStore()) { 359 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) { 360 // A store to a specific PseudoSourceValue. Add precise dependencies. 361 // Handle the def in MemDefs, if there is one. 362 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 363 if (I != MemDefs.end()) { 364 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 365 /*isNormalMemory=*/true)); 366 I->second = SU; 367 } else { 368 MemDefs[V] = SU; 369 } 370 // Handle the uses in MemUses, if there are any. 371 std::map<const Value *, std::vector<SUnit *> >::iterator J = 372 MemUses.find(V); 373 if (J != MemUses.end()) { 374 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 375 J->second[i]->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 376 /*isNormalMemory=*/true)); 377 J->second.clear(); 378 } 379 // Add dependencies from all the PendingLoads, since without 380 // memoperands we must assume they alias anything. 381 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 382 PendingLoads[k]->addPred(SDep(SU, SDep::Order, SU->Latency)); 383 // Add a general dependence too, if needed. 384 if (Chain) 385 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 386 } else 387 // Treat all other stores conservatively. 388 goto new_chain; 389 } else if (TID.mayLoad()) { 390 if (MI->isInvariantLoad(AA)) { 391 // Invariant load, no chain dependencies needed! 392 } else if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) { 393 // A load from a specific PseudoSourceValue. Add precise dependencies. 394 std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); 395 if (I != MemDefs.end()) 396 I->second->addPred(SDep(SU, SDep::Order, SU->Latency, /*Reg=*/0, 397 /*isNormalMemory=*/true)); 398 MemUses[V].push_back(SU); 399 400 // Add a general dependence too, if needed. 401 if (Chain && (!ChainMMO || 402 (ChainMMO->isStore() || ChainMMO->isVolatile()))) 403 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 404 } else if (MI->hasVolatileMemoryRef()) { 405 // Treat volatile loads conservatively. Note that this includes 406 // cases where memoperand information is unavailable. 407 goto new_chain; 408 } else { 409 // A normal load. Depend on the general chain, as well as on 410 // all stores. In the absense of MachineMemOperand information, 411 // we can't even assume that the load doesn't alias well-behaved 412 // memory locations. 413 if (Chain) 414 Chain->addPred(SDep(SU, SDep::Order, SU->Latency)); 415 for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), 416 E = MemDefs.end(); I != E; ++I) 417 I->second->addPred(SDep(SU, SDep::Order, SU->Latency)); 418 PendingLoads.push_back(SU); 419 } 420 } 421 } 422 423 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 424 Defs[i].clear(); 425 Uses[i].clear(); 426 } 427 PendingLoads.clear(); 428} 429 430void ScheduleDAGInstrs::FinishBlock() { 431 // Nothing to do. 432} 433 434void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 435 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 436 437 // Compute the latency for the node. 438 SU->Latency = 439 InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); 440 441 // Simplistic target-independent heuristic: assume that loads take 442 // extra time. 443 if (InstrItins.isEmpty()) 444 if (SU->getInstr()->getDesc().mayLoad()) 445 SU->Latency += 2; 446} 447 448void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 449 SDep& dep) const { 450 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 451 if (InstrItins.isEmpty()) 452 return; 453 454 // For a data dependency with a known register... 455 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 456 return; 457 458 const unsigned Reg = dep.getReg(); 459 460 // ... find the definition of the register in the defining 461 // instruction 462 MachineInstr *DefMI = Def->getInstr(); 463 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 464 if (DefIdx != -1) { 465 int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); 466 if (DefCycle >= 0) { 467 MachineInstr *UseMI = Use->getInstr(); 468 const unsigned UseClass = UseMI->getDesc().getSchedClass(); 469 470 // For all uses of the register, calculate the maxmimum latency 471 int Latency = -1; 472 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 473 const MachineOperand &MO = UseMI->getOperand(i); 474 if (!MO.isReg() || !MO.isUse()) 475 continue; 476 unsigned MOReg = MO.getReg(); 477 if (MOReg != Reg) 478 continue; 479 480 int UseCycle = InstrItins.getOperandCycle(UseClass, i); 481 if (UseCycle >= 0) 482 Latency = std::max(Latency, DefCycle - UseCycle + 1); 483 } 484 485 // If we found a latency, then replace the existing dependence latency. 486 if (Latency >= 0) 487 dep.setLatency(Latency); 488 } 489 } 490} 491 492void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 493 SU->getInstr()->dump(); 494} 495 496std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 497 std::string s; 498 raw_string_ostream oss(s); 499 if (SU == &EntrySU) 500 oss << "<entry>"; 501 else if (SU == &ExitSU) 502 oss << "<exit>"; 503 else 504 SU->getInstr()->print(oss); 505 return oss.str(); 506} 507 508// EmitSchedule - Emit the machine code in scheduled order. 509MachineBasicBlock *ScheduleDAGInstrs:: 510EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { 511 // For MachineInstr-based scheduling, we're rescheduling the instructions in 512 // the block, so start by removing them from the block. 513 while (Begin != InsertPos) { 514 MachineBasicBlock::iterator I = Begin; 515 ++Begin; 516 BB->remove(I); 517 } 518 519 // Then re-insert them according to the given schedule. 520 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 521 SUnit *SU = Sequence[i]; 522 if (!SU) { 523 // Null SUnit* is a noop. 524 EmitNoop(); 525 continue; 526 } 527 528 BB->insert(InsertPos, SU->getInstr()); 529 } 530 531 // Update the Begin iterator, as the first instruction in the block 532 // may have been scheduled later. 533 if (!Sequence.empty()) 534 Begin = Sequence[0]->getInstr(); 535 536 return BB; 537} 538