ScheduleDAGInstrs.cpp revision d3a7486ef351697450cfe87b6cce82a3eb906874
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "llvm/Operator.h" 17#include "llvm/Analysis/AliasAnalysis.h" 18#include "llvm/Analysis/ValueTracking.h" 19#include "llvm/CodeGen/LiveIntervalAnalysis.h" 20#include "llvm/CodeGen/MachineFunctionPass.h" 21#include "llvm/CodeGen/MachineMemOperand.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/CodeGen/PseudoSourceValue.h" 24#include "llvm/CodeGen/ScheduleDAGInstrs.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), 42 IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), 43 FirstDbgValue(0) { 44 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 45 DbgValues.clear(); 46 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 47 "Virtual registers must be removed prior to PostRA scheduling"); 48} 49 50/// getUnderlyingObjectFromInt - This is the function that does the work of 51/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 52static const Value *getUnderlyingObjectFromInt(const Value *V) { 53 do { 54 if (const Operator *U = dyn_cast<Operator>(V)) { 55 // If we find a ptrtoint, we can transfer control back to the 56 // regular getUnderlyingObjectFromInt. 57 if (U->getOpcode() == Instruction::PtrToInt) 58 return U->getOperand(0); 59 // If we find an add of a constant or a multiplied value, it's 60 // likely that the other operand will lead us to the base 61 // object. We don't have to worry about the case where the 62 // object address is somehow being computed by the multiply, 63 // because our callers only care when the result is an 64 // identifibale object. 65 if (U->getOpcode() != Instruction::Add || 66 (!isa<ConstantInt>(U->getOperand(1)) && 67 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 68 return V; 69 V = U->getOperand(0); 70 } else { 71 return V; 72 } 73 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 74 } while (1); 75} 76 77/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 78/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 79static const Value *getUnderlyingObject(const Value *V) { 80 // First just call Value::getUnderlyingObject to let it do what it does. 81 do { 82 V = GetUnderlyingObject(V); 83 // If it found an inttoptr, use special code to continue climing. 84 if (Operator::getOpcode(V) != Instruction::IntToPtr) 85 break; 86 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 87 // If that succeeded in finding a pointer, continue the search. 88 if (!O->getType()->isPointerTy()) 89 break; 90 V = O; 91 } while (1); 92 return V; 93} 94 95/// getUnderlyingObjectForInstr - If this machine instr has memory reference 96/// information and it can be tracked to a normal reference to a known 97/// object, return the Value for that object. Otherwise return null. 98static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 99 const MachineFrameInfo *MFI, 100 bool &MayAlias) { 101 MayAlias = true; 102 if (!MI->hasOneMemOperand() || 103 !(*MI->memoperands_begin())->getValue() || 104 (*MI->memoperands_begin())->isVolatile()) 105 return 0; 106 107 const Value *V = (*MI->memoperands_begin())->getValue(); 108 if (!V) 109 return 0; 110 111 V = getUnderlyingObject(V); 112 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 113 // For now, ignore PseudoSourceValues which may alias LLVM IR values 114 // because the code that uses this function has no way to cope with 115 // such aliases. 116 if (PSV->isAliased(MFI)) 117 return 0; 118 119 MayAlias = PSV->mayAlias(MFI); 120 return V; 121 } 122 123 if (isIdentifiedObject(V)) 124 return V; 125 126 return 0; 127} 128 129void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { 130 LoopRegs.Deps.clear(); 131 if (MachineLoop *ML = MLI.getLoopFor(BB)) 132 if (BB == ML->getLoopLatch()) 133 LoopRegs.VisitLoop(ML); 134} 135 136void ScheduleDAGInstrs::finishBlock() { 137 // Nothing to do. 138} 139 140/// Initialize the map with the number of registers. 141void Reg2SUnitsMap::setRegLimit(unsigned Limit) { 142 PhysRegSet.setUniverse(Limit); 143 SUnits.resize(Limit); 144} 145 146/// Clear the map without deallocating storage. 147void Reg2SUnitsMap::clear() { 148 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 149 SUnits[*I].clear(); 150 } 151 PhysRegSet.clear(); 152} 153 154/// Initialize the DAG and common scheduler state for the current scheduling 155/// region. This does not actually create the DAG, only clears it. The 156/// scheduling driver may call BuildSchedGraph multiple times per scheduling 157/// region. 158void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 159 MachineBasicBlock::iterator begin, 160 MachineBasicBlock::iterator end, 161 unsigned endcount) { 162 BB = bb; 163 RegionBegin = begin; 164 RegionEnd = end; 165 EndIndex = endcount; 166 MISUnitMap.clear(); 167 168 // Check to see if the scheduler cares about latencies. 169 UnitLatencies = forceUnitLatencies(); 170 171 ScheduleDAG::clearDAG(); 172} 173 174/// Close the current scheduling region. Don't clear any state in case the 175/// driver wants to refer to the previous scheduling region. 176void ScheduleDAGInstrs::exitRegion() { 177 // Nothing to do. 178} 179 180/// addSchedBarrierDeps - Add dependencies from instructions in the current 181/// list of instructions being scheduled to scheduling barrier by adding 182/// the exit SU to the register defs and use list. This is because we want to 183/// make sure instructions which define registers that are either used by 184/// the terminator or are live-out are properly scheduled. This is 185/// especially important when the definition latency of the return value(s) 186/// are too high to be hidden by the branch or when the liveout registers 187/// used by instructions in the fallthrough block. 188void ScheduleDAGInstrs::addSchedBarrierDeps() { 189 MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 190 ExitSU.setInstr(ExitMI); 191 bool AllDepKnown = ExitMI && 192 (ExitMI->isCall() || ExitMI->isBarrier()); 193 if (ExitMI && AllDepKnown) { 194 // If it's a call or a barrier, add dependencies on the defs and uses of 195 // instruction. 196 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 197 const MachineOperand &MO = ExitMI->getOperand(i); 198 if (!MO.isReg() || MO.isDef()) continue; 199 unsigned Reg = MO.getReg(); 200 if (Reg == 0) continue; 201 202 if (TRI->isPhysicalRegister(Reg)) 203 Uses[Reg].push_back(&ExitSU); 204 else { 205 assert(!IsPostRA && "Virtual register encountered after regalloc."); 206 addVRegUseDeps(&ExitSU, i); 207 } 208 } 209 } else { 210 // For others, e.g. fallthrough, conditional branch, assume the exit 211 // uses all the registers that are livein to the successor blocks. 212 SmallSet<unsigned, 8> Seen; 213 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 214 SE = BB->succ_end(); SI != SE; ++SI) 215 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 216 E = (*SI)->livein_end(); I != E; ++I) { 217 unsigned Reg = *I; 218 if (Seen.insert(Reg)) 219 Uses[Reg].push_back(&ExitSU); 220 } 221 } 222} 223 224/// MO is an operand of SU's instruction that defines a physical register. Add 225/// data dependencies from SU to any uses of the physical register. 226void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 227 const MachineOperand &MO) { 228 assert(MO.isDef() && "expect physreg def"); 229 230 // Ask the target if address-backscheduling is desirable, and if so how much. 231 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 232 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 233 unsigned DataLatency = SU->Latency; 234 235 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 236 if (!Uses.contains(*Alias)) 237 continue; 238 std::vector<SUnit*> &UseList = Uses[*Alias]; 239 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 240 SUnit *UseSU = UseList[i]; 241 if (UseSU == SU) 242 continue; 243 unsigned LDataLatency = DataLatency; 244 // Optionally add in a special extra latency for nodes that 245 // feed addresses. 246 // TODO: Perhaps we should get rid of 247 // SpecialAddressLatency and just move this into 248 // adjustSchedDependency for the targets that care about it. 249 if (SpecialAddressLatency != 0 && !UnitLatencies && 250 UseSU != &ExitSU) { 251 MachineInstr *UseMI = UseSU->getInstr(); 252 const MCInstrDesc &UseMCID = UseMI->getDesc(); 253 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 254 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 255 if (RegUseIndex >= 0 && 256 (UseMI->mayLoad() || UseMI->mayStore()) && 257 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 258 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 259 LDataLatency += SpecialAddressLatency; 260 } 261 // Adjust the dependence latency using operand def/use 262 // information (if any), and then allow the target to 263 // perform its own adjustments. 264 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 265 if (!UnitLatencies) { 266 computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 267 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 268 } 269 UseSU->addPred(dep); 270 } 271 } 272} 273 274/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 275/// this SUnit to following instructions in the same scheduling region that 276/// depend the physical register referenced at OperIdx. 277void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 278 const MachineInstr *MI = SU->getInstr(); 279 const MachineOperand &MO = MI->getOperand(OperIdx); 280 281 // Optionally add output and anti dependencies. For anti 282 // dependencies we use a latency of 0 because for a multi-issue 283 // target we want to allow the defining instruction to issue 284 // in the same cycle as the using instruction. 285 // TODO: Using a latency of 1 here for output dependencies assumes 286 // there's no cost for reusing registers. 287 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 288 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 289 if (!Defs.contains(*Alias)) 290 continue; 291 std::vector<SUnit *> &DefList = Defs[*Alias]; 292 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 293 SUnit *DefSU = DefList[i]; 294 if (DefSU == &ExitSU) 295 continue; 296 if (DefSU != SU && 297 (Kind != SDep::Output || !MO.isDead() || 298 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 299 if (Kind == SDep::Anti) 300 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 301 else { 302 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 303 DefSU->getInstr()); 304 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 305 } 306 } 307 } 308 } 309 310 if (!MO.isDef()) { 311 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 312 // retrieve the existing SUnits list for this register's uses. 313 // Push this SUnit on the use list. 314 Uses[MO.getReg()].push_back(SU); 315 } 316 else { 317 addPhysRegDataDeps(SU, MO); 318 319 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 320 // retrieve the existing SUnits list for this register's defs. 321 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 322 323 // If a def is going to wrap back around to the top of the loop, 324 // backschedule it. 325 if (!UnitLatencies && DefList.empty()) { 326 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 327 if (I != LoopRegs.Deps.end()) { 328 const MachineOperand *UseMO = I->second.first; 329 unsigned Count = I->second.second; 330 const MachineInstr *UseMI = UseMO->getParent(); 331 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 332 const MCInstrDesc &UseMCID = UseMI->getDesc(); 333 const TargetSubtargetInfo &ST = 334 TM.getSubtarget<TargetSubtargetInfo>(); 335 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 336 // TODO: If we knew the total depth of the region here, we could 337 // handle the case where the whole loop is inside the region but 338 // is large enough that the isScheduleHigh trick isn't needed. 339 if (UseMOIdx < UseMCID.getNumOperands()) { 340 // Currently, we only support scheduling regions consisting of 341 // single basic blocks. Check to see if the instruction is in 342 // the same region by checking to see if it has the same parent. 343 if (UseMI->getParent() != MI->getParent()) { 344 unsigned Latency = SU->Latency; 345 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 346 Latency += SpecialAddressLatency; 347 // This is a wild guess as to the portion of the latency which 348 // will be overlapped by work done outside the current 349 // scheduling region. 350 Latency -= std::min(Latency, Count); 351 // Add the artificial edge. 352 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 353 /*Reg=*/0, /*isNormalMemory=*/false, 354 /*isMustAlias=*/false, 355 /*isArtificial=*/true)); 356 } else if (SpecialAddressLatency > 0 && 357 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 358 // The entire loop body is within the current scheduling region 359 // and the latency of this operation is assumed to be greater 360 // than the latency of the loop. 361 // TODO: Recursively mark data-edge predecessors as 362 // isScheduleHigh too. 363 SU->isScheduleHigh = true; 364 } 365 } 366 LoopRegs.Deps.erase(I); 367 } 368 } 369 370 // clear this register's use list 371 if (Uses.contains(MO.getReg())) 372 Uses[MO.getReg()].clear(); 373 374 if (!MO.isDead()) 375 DefList.clear(); 376 377 // Calls will not be reordered because of chain dependencies (see 378 // below). Since call operands are dead, calls may continue to be added 379 // to the DefList making dependence checking quadratic in the size of 380 // the block. Instead, we leave only one call at the back of the 381 // DefList. 382 if (SU->isCall) { 383 while (!DefList.empty() && DefList.back()->isCall) 384 DefList.pop_back(); 385 } 386 // Defs are pushed in the order they are visited and never reordered. 387 DefList.push_back(SU); 388 } 389} 390 391/// addVRegDefDeps - Add register output and data dependencies from this SUnit 392/// to instructions that occur later in the same scheduling region if they read 393/// from or write to the virtual register defined at OperIdx. 394/// 395/// TODO: Hoist loop induction variable increments. This has to be 396/// reevaluated. Generally, IV scheduling should be done before coalescing. 397void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 398 const MachineInstr *MI = SU->getInstr(); 399 unsigned Reg = MI->getOperand(OperIdx).getReg(); 400 401 // SSA defs do not have output/anti dependencies. 402 // The current operand is a def, so we have at least one. 403 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 404 return; 405 406 // Add output dependence to the next nearest def of this vreg. 407 // 408 // Unless this definition is dead, the output dependence should be 409 // transitively redundant with antidependencies from this definition's 410 // uses. We're conservative for now until we have a way to guarantee the uses 411 // are not eliminated sometime during scheduling. The output dependence edge 412 // is also useful if output latency exceeds def-use latency. 413 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 414 if (DefI == VRegDefs.end()) 415 VRegDefs.insert(VReg2SUnit(Reg, SU)); 416 else { 417 SUnit *DefSU = DefI->SU; 418 if (DefSU != SU && DefSU != &ExitSU) { 419 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 420 DefSU->getInstr()); 421 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 422 } 423 DefI->SU = SU; 424 } 425} 426 427/// addVRegUseDeps - Add a register data dependency if the instruction that 428/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 429/// register antidependency from this SUnit to instructions that occur later in 430/// the same scheduling region if they write the virtual register. 431/// 432/// TODO: Handle ExitSU "uses" properly. 433void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 434 MachineInstr *MI = SU->getInstr(); 435 unsigned Reg = MI->getOperand(OperIdx).getReg(); 436 437 // Lookup this operand's reaching definition. 438 assert(LIS && "vreg dependencies requires LiveIntervals"); 439 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 440 LiveInterval *LI = &LIS->getInterval(Reg); 441 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 442 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 443 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 444 // Phis and other noninstructions (after coalescing) have a NULL Def. 445 if (Def) { 446 SUnit *DefSU = getSUnit(Def); 447 if (DefSU) { 448 // The reaching Def lives within this scheduling region. 449 // Create a data dependence. 450 // 451 // TODO: Handle "special" address latencies cleanly. 452 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 453 if (!UnitLatencies) { 454 // Adjust the dependence latency using operand def/use information, then 455 // allow the target to perform its own adjustments. 456 computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 457 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 458 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 459 } 460 SU->addPred(dep); 461 } 462 } 463 464 // Add antidependence to the following def of the vreg it uses. 465 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 466 if (DefI != VRegDefs.end() && DefI->SU != SU) 467 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 468} 469 470/// Create an SUnit for each real instruction, numbered in top-down toplological 471/// order. The instruction order A < B, implies that no edge exists from B to A. 472/// 473/// Map each real instruction to its SUnit. 474/// 475/// After initSUnits, the SUnits vector cannot be resized and the scheduler may 476/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 477/// instead of pointers. 478/// 479/// MachineScheduler relies on initSUnits numbering the nodes by their order in 480/// the original instruction list. 481void ScheduleDAGInstrs::initSUnits() { 482 // We'll be allocating one SUnit for each real instruction in the region, 483 // which is contained within a basic block. 484 SUnits.reserve(BB->size()); 485 486 for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 487 MachineInstr *MI = I; 488 if (MI->isDebugValue()) 489 continue; 490 491 SUnit *SU = newSUnit(MI); 492 MISUnitMap[MI] = SU; 493 494 SU->isCall = MI->isCall(); 495 SU->isCommutable = MI->isCommutable(); 496 497 // Assign the Latency field of SU using target-provided information. 498 if (UnitLatencies) 499 SU->Latency = 1; 500 else 501 computeLatency(SU); 502 } 503} 504 505void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { 506 // Create an SUnit for each real instruction. 507 initSUnits(); 508 509 // We build scheduling units by walking a block's instruction list from bottom 510 // to top. 511 512 // Remember where a generic side-effecting instruction is as we procede. 513 SUnit *BarrierChain = 0, *AliasChain = 0; 514 515 // Memory references to specific known memory locations are tracked 516 // so that they can be given more precise dependencies. We track 517 // separately the known memory locations that may alias and those 518 // that are known not to alias 519 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 520 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 521 522 // Remove any stale debug info; sometimes BuildSchedGraph is called again 523 // without emitting the info from the previous call. 524 DbgValues.clear(); 525 FirstDbgValue = NULL; 526 527 assert(Defs.empty() && Uses.empty() && 528 "Only BuildGraph should update Defs/Uses"); 529 Defs.setRegLimit(TRI->getNumRegs()); 530 Uses.setRegLimit(TRI->getNumRegs()); 531 532 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 533 // FIXME: Allow SparseSet to reserve space for the creation of virtual 534 // registers during scheduling. Don't artificially inflate the Universe 535 // because we want to assert that vregs are not created during DAG building. 536 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 537 538 // Model data dependencies between instructions being scheduled and the 539 // ExitSU. 540 addSchedBarrierDeps(); 541 542 // Walk the list of instructions, from bottom moving up. 543 MachineInstr *PrevMI = NULL; 544 for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 545 MII != MIE; --MII) { 546 MachineInstr *MI = prior(MII); 547 if (MI && PrevMI) { 548 DbgValues.push_back(std::make_pair(PrevMI, MI)); 549 PrevMI = NULL; 550 } 551 552 if (MI->isDebugValue()) { 553 PrevMI = MI; 554 continue; 555 } 556 557 assert(!MI->isTerminator() && !MI->isLabel() && 558 "Cannot schedule terminators or labels!"); 559 560 SUnit *SU = MISUnitMap[MI]; 561 assert(SU && "No SUnit mapped to this MI"); 562 563 // Add register-based dependencies (data, anti, and output). 564 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 565 const MachineOperand &MO = MI->getOperand(j); 566 if (!MO.isReg()) continue; 567 unsigned Reg = MO.getReg(); 568 if (Reg == 0) continue; 569 570 if (TRI->isPhysicalRegister(Reg)) 571 addPhysRegDeps(SU, j); 572 else { 573 assert(!IsPostRA && "Virtual register encountered!"); 574 if (MO.isDef()) 575 addVRegDefDeps(SU, j); 576 else if (MO.readsReg()) // ignore undef operands 577 addVRegUseDeps(SU, j); 578 } 579 } 580 581 // Add chain dependencies. 582 // Chain dependencies used to enforce memory order should have 583 // latency of 0 (except for true dependency of Store followed by 584 // aliased Load... we estimate that with a single cycle of latency 585 // assuming the hardware will bypass) 586 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 587 // after stack slots are lowered to actual addresses. 588 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 589 // produce more precise dependence information. 590#define STORE_LOAD_LATENCY 1 591 unsigned TrueMemOrderLatency = 0; 592 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 593 (MI->hasVolatileMemoryRef() && 594 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 595 // Be conservative with these and add dependencies on all memory 596 // references, even those that are known to not alias. 597 for (std::map<const Value *, SUnit *>::iterator I = 598 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 599 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 600 } 601 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 602 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 603 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 604 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 605 } 606 NonAliasMemDefs.clear(); 607 NonAliasMemUses.clear(); 608 // Add SU to the barrier chain. 609 if (BarrierChain) 610 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 611 BarrierChain = SU; 612 613 // fall-through 614 new_alias_chain: 615 // Chain all possibly aliasing memory references though SU. 616 if (AliasChain) 617 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 618 AliasChain = SU; 619 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 620 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 621 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 622 E = AliasMemDefs.end(); I != E; ++I) { 623 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 624 } 625 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 626 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 627 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 628 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 629 } 630 PendingLoads.clear(); 631 AliasMemDefs.clear(); 632 AliasMemUses.clear(); 633 } else if (MI->mayStore()) { 634 bool MayAlias = true; 635 TrueMemOrderLatency = STORE_LOAD_LATENCY; 636 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 637 // A store to a specific PseudoSourceValue. Add precise dependencies. 638 // Record the def in MemDefs, first adding a dep if there is 639 // an existing def. 640 std::map<const Value *, SUnit *>::iterator I = 641 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 642 std::map<const Value *, SUnit *>::iterator IE = 643 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 644 if (I != IE) { 645 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 646 /*isNormalMemory=*/true)); 647 I->second = SU; 648 } else { 649 if (MayAlias) 650 AliasMemDefs[V] = SU; 651 else 652 NonAliasMemDefs[V] = SU; 653 } 654 // Handle the uses in MemUses, if there are any. 655 std::map<const Value *, std::vector<SUnit *> >::iterator J = 656 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 657 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 658 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 659 if (J != JE) { 660 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 661 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 662 /*Reg=*/0, /*isNormalMemory=*/true)); 663 J->second.clear(); 664 } 665 if (MayAlias) { 666 // Add dependencies from all the PendingLoads, i.e. loads 667 // with no underlying object. 668 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 669 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 670 // Add dependence on alias chain, if needed. 671 if (AliasChain) 672 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 673 } 674 // Add dependence on barrier chain, if needed. 675 if (BarrierChain) 676 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 677 } else { 678 // Treat all other stores conservatively. 679 goto new_alias_chain; 680 } 681 682 if (!ExitSU.isPred(SU)) 683 // Push store's up a bit to avoid them getting in between cmp 684 // and branches. 685 ExitSU.addPred(SDep(SU, SDep::Order, 0, 686 /*Reg=*/0, /*isNormalMemory=*/false, 687 /*isMustAlias=*/false, 688 /*isArtificial=*/true)); 689 } else if (MI->mayLoad()) { 690 bool MayAlias = true; 691 TrueMemOrderLatency = 0; 692 if (MI->isInvariantLoad(AA)) { 693 // Invariant load, no chain dependencies needed! 694 } else { 695 if (const Value *V = 696 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 697 // A load from a specific PseudoSourceValue. Add precise dependencies. 698 std::map<const Value *, SUnit *>::iterator I = 699 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 700 std::map<const Value *, SUnit *>::iterator IE = 701 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 702 if (I != IE) 703 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 704 /*isNormalMemory=*/true)); 705 if (MayAlias) 706 AliasMemUses[V].push_back(SU); 707 else 708 NonAliasMemUses[V].push_back(SU); 709 } else { 710 // A load with no underlying object. Depend on all 711 // potentially aliasing stores. 712 for (std::map<const Value *, SUnit *>::iterator I = 713 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 714 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 715 716 PendingLoads.push_back(SU); 717 MayAlias = true; 718 } 719 720 // Add dependencies on alias and barrier chains, if needed. 721 if (MayAlias && AliasChain) 722 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 723 if (BarrierChain) 724 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 725 } 726 } 727 } 728 if (PrevMI) 729 FirstDbgValue = PrevMI; 730 731 Defs.clear(); 732 Uses.clear(); 733 VRegDefs.clear(); 734 PendingLoads.clear(); 735} 736 737void ScheduleDAGInstrs::computeLatency(SUnit *SU) { 738 // Compute the latency for the node. 739 if (!InstrItins || InstrItins->isEmpty()) { 740 SU->Latency = 1; 741 742 // Simplistic target-independent heuristic: assume that loads take 743 // extra time. 744 if (SU->getInstr()->mayLoad()) 745 SU->Latency += 2; 746 } else { 747 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 748 } 749} 750 751void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, 752 SDep& dep) const { 753 if (!InstrItins || InstrItins->isEmpty()) 754 return; 755 756 // For a data dependency with a known register... 757 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 758 return; 759 760 const unsigned Reg = dep.getReg(); 761 762 // ... find the definition of the register in the defining 763 // instruction 764 MachineInstr *DefMI = Def->getInstr(); 765 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 766 if (DefIdx != -1) { 767 const MachineOperand &MO = DefMI->getOperand(DefIdx); 768 if (MO.isReg() && MO.isImplicit() && 769 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 770 // This is an implicit def, getOperandLatency() won't return the correct 771 // latency. e.g. 772 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 773 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 774 // What we want is to compute latency between def of %D6/%D7 and use of 775 // %Q3 instead. 776 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 777 if (DefMI->getOperand(Op2).isReg()) 778 DefIdx = Op2; 779 } 780 MachineInstr *UseMI = Use->getInstr(); 781 // For all uses of the register, calculate the maxmimum latency 782 int Latency = -1; 783 if (UseMI) { 784 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 785 const MachineOperand &MO = UseMI->getOperand(i); 786 if (!MO.isReg() || !MO.isUse()) 787 continue; 788 unsigned MOReg = MO.getReg(); 789 if (MOReg != Reg) 790 continue; 791 792 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 793 UseMI, i); 794 Latency = std::max(Latency, UseCycle); 795 } 796 } else { 797 // UseMI is null, then it must be a scheduling barrier. 798 if (!InstrItins || InstrItins->isEmpty()) 799 return; 800 unsigned DefClass = DefMI->getDesc().getSchedClass(); 801 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 802 } 803 804 // If we found a latency, then replace the existing dependence latency. 805 if (Latency >= 0) 806 dep.setLatency(Latency); 807 } 808} 809 810void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 811 SU->getInstr()->dump(); 812} 813 814std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 815 std::string s; 816 raw_string_ostream oss(s); 817 if (SU == &EntrySU) 818 oss << "<entry>"; 819 else if (SU == &ExitSU) 820 oss << "<exit>"; 821 else 822 SU->getInstr()->print(oss); 823 return oss.str(); 824} 825 826/// Return the basic block label. It is not necessarilly unique because a block 827/// contains multiple scheduling regions. But it is fine for visualization. 828std::string ScheduleDAGInstrs::getDAGName() const { 829 return "dag." + BB->getFullName(); 830} 831