ScheduleDAGInstrs.cpp revision 47c144505b9be28ed22c626b3a407c11dba2fec5
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "ScheduleDAGInstrs.h" 17#include "llvm/Operator.h" 18#include "llvm/Analysis/AliasAnalysis.h" 19#include "llvm/Analysis/ValueTracking.h" 20#include "llvm/CodeGen/LiveIntervalAnalysis.h" 21#include "llvm/CodeGen/MachineFunctionPass.h" 22#include "llvm/CodeGen/MachineMemOperand.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), 42 LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) { 43 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 44 DbgValues.clear(); 45 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 46 "Virtual registers must be removed prior to PostRA scheduling"); 47} 48 49/// getUnderlyingObjectFromInt - This is the function that does the work of 50/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 51static const Value *getUnderlyingObjectFromInt(const Value *V) { 52 do { 53 if (const Operator *U = dyn_cast<Operator>(V)) { 54 // If we find a ptrtoint, we can transfer control back to the 55 // regular getUnderlyingObjectFromInt. 56 if (U->getOpcode() == Instruction::PtrToInt) 57 return U->getOperand(0); 58 // If we find an add of a constant or a multiplied value, it's 59 // likely that the other operand will lead us to the base 60 // object. We don't have to worry about the case where the 61 // object address is somehow being computed by the multiply, 62 // because our callers only care when the result is an 63 // identifibale object. 64 if (U->getOpcode() != Instruction::Add || 65 (!isa<ConstantInt>(U->getOperand(1)) && 66 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 67 return V; 68 V = U->getOperand(0); 69 } else { 70 return V; 71 } 72 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 73 } while (1); 74} 75 76/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 77/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 78static const Value *getUnderlyingObject(const Value *V) { 79 // First just call Value::getUnderlyingObject to let it do what it does. 80 do { 81 V = GetUnderlyingObject(V); 82 // If it found an inttoptr, use special code to continue climing. 83 if (Operator::getOpcode(V) != Instruction::IntToPtr) 84 break; 85 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 86 // If that succeeded in finding a pointer, continue the search. 87 if (!O->getType()->isPointerTy()) 88 break; 89 V = O; 90 } while (1); 91 return V; 92} 93 94/// getUnderlyingObjectForInstr - If this machine instr has memory reference 95/// information and it can be tracked to a normal reference to a known 96/// object, return the Value for that object. Otherwise return null. 97static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 98 const MachineFrameInfo *MFI, 99 bool &MayAlias) { 100 MayAlias = true; 101 if (!MI->hasOneMemOperand() || 102 !(*MI->memoperands_begin())->getValue() || 103 (*MI->memoperands_begin())->isVolatile()) 104 return 0; 105 106 const Value *V = (*MI->memoperands_begin())->getValue(); 107 if (!V) 108 return 0; 109 110 V = getUnderlyingObject(V); 111 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 112 // For now, ignore PseudoSourceValues which may alias LLVM IR values 113 // because the code that uses this function has no way to cope with 114 // such aliases. 115 if (PSV->isAliased(MFI)) 116 return 0; 117 118 MayAlias = PSV->mayAlias(MFI); 119 return V; 120 } 121 122 if (isIdentifiedObject(V)) 123 return V; 124 125 return 0; 126} 127 128void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 129 LoopRegs.Deps.clear(); 130 if (MachineLoop *ML = MLI.getLoopFor(BB)) 131 if (BB == ML->getLoopLatch()) 132 LoopRegs.VisitLoop(ML); 133} 134 135void ScheduleDAGInstrs::FinishBlock() { 136 // Nothing to do. 137} 138 139/// Initialize the map with the number of registers. 140void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) { 141 PhysRegSet.setUniverse(Limit); 142 SUnits.resize(Limit); 143} 144 145/// Clear the map without deallocating storage. 146void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { 147 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 148 SUnits[*I].clear(); 149 } 150 PhysRegSet.clear(); 151} 152 153/// Initialize the DAG and common scheduler state for the current scheduling 154/// region. This does not actually create the DAG, only clears it. The 155/// scheduling driver may call BuildSchedGraph multiple times per scheduling 156/// region. 157void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 158 MachineBasicBlock::iterator begin, 159 MachineBasicBlock::iterator end, 160 unsigned endcount) { 161 BB = bb; 162 Begin = begin; 163 InsertPos = end; 164 InsertPosIndex = endcount; 165 166 // Check to see if the scheduler cares about latencies. 167 UnitLatencies = ForceUnitLatencies(); 168 169 ScheduleDAG::clearDAG(); 170} 171 172/// Close the current scheduling region. Don't clear any state in case the 173/// driver wants to refer to the previous scheduling region. 174void ScheduleDAGInstrs::exitRegion() { 175 // Nothing to do. 176} 177 178/// AddSchedBarrierDeps - Add dependencies from instructions in the current 179/// list of instructions being scheduled to scheduling barrier by adding 180/// the exit SU to the register defs and use list. This is because we want to 181/// make sure instructions which define registers that are either used by 182/// the terminator or are live-out are properly scheduled. This is 183/// especially important when the definition latency of the return value(s) 184/// are too high to be hidden by the branch or when the liveout registers 185/// used by instructions in the fallthrough block. 186void ScheduleDAGInstrs::AddSchedBarrierDeps() { 187 MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; 188 ExitSU.setInstr(ExitMI); 189 bool AllDepKnown = ExitMI && 190 (ExitMI->isCall() || ExitMI->isBarrier()); 191 if (ExitMI && AllDepKnown) { 192 // If it's a call or a barrier, add dependencies on the defs and uses of 193 // instruction. 194 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 195 const MachineOperand &MO = ExitMI->getOperand(i); 196 if (!MO.isReg() || MO.isDef()) continue; 197 unsigned Reg = MO.getReg(); 198 if (Reg == 0) continue; 199 200 if (TRI->isPhysicalRegister(Reg)) 201 Uses[Reg].push_back(&ExitSU); 202 else 203 assert(!IsPostRA && "Virtual register encountered after regalloc."); 204 } 205 } else { 206 // For others, e.g. fallthrough, conditional branch, assume the exit 207 // uses all the registers that are livein to the successor blocks. 208 SmallSet<unsigned, 8> Seen; 209 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 210 SE = BB->succ_end(); SI != SE; ++SI) 211 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 212 E = (*SI)->livein_end(); I != E; ++I) { 213 unsigned Reg = *I; 214 if (Seen.insert(Reg)) 215 Uses[Reg].push_back(&ExitSU); 216 } 217 } 218} 219 220/// MO is an operand of SU's instruction that defines a physical register. Add 221/// data dependencies from SU to any uses of the physical register. 222void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 223 const MachineOperand &MO) { 224 assert(MO.isDef() && "expect physreg def"); 225 226 // Ask the target if address-backscheduling is desirable, and if so how much. 227 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 228 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 229 unsigned DataLatency = SU->Latency; 230 231 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 232 if (!Uses.contains(*Alias)) 233 continue; 234 std::vector<SUnit*> &UseList = Uses[*Alias]; 235 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 236 SUnit *UseSU = UseList[i]; 237 if (UseSU == SU) 238 continue; 239 unsigned LDataLatency = DataLatency; 240 // Optionally add in a special extra latency for nodes that 241 // feed addresses. 242 // TODO: Perhaps we should get rid of 243 // SpecialAddressLatency and just move this into 244 // adjustSchedDependency for the targets that care about it. 245 if (SpecialAddressLatency != 0 && !UnitLatencies && 246 UseSU != &ExitSU) { 247 MachineInstr *UseMI = UseSU->getInstr(); 248 const MCInstrDesc &UseMCID = UseMI->getDesc(); 249 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 250 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 251 if (RegUseIndex >= 0 && 252 (UseMI->mayLoad() || UseMI->mayStore()) && 253 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 254 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 255 LDataLatency += SpecialAddressLatency; 256 } 257 // Adjust the dependence latency using operand def/use 258 // information (if any), and then allow the target to 259 // perform its own adjustments. 260 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 261 if (!UnitLatencies) { 262 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 263 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 264 } 265 UseSU->addPred(dep); 266 } 267 } 268} 269 270/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 271/// this SUnit to following instructions in the same scheduling region that 272/// depend the physical register referenced at OperIdx. 273void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 274 const MachineInstr *MI = SU->getInstr(); 275 const MachineOperand &MO = MI->getOperand(OperIdx); 276 277 // Optionally add output and anti dependencies. For anti 278 // dependencies we use a latency of 0 because for a multi-issue 279 // target we want to allow the defining instruction to issue 280 // in the same cycle as the using instruction. 281 // TODO: Using a latency of 1 here for output dependencies assumes 282 // there's no cost for reusing registers. 283 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 284 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 285 if (!Defs.contains(*Alias)) 286 continue; 287 std::vector<SUnit *> &DefList = Defs[*Alias]; 288 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 289 SUnit *DefSU = DefList[i]; 290 if (DefSU == &ExitSU) 291 continue; 292 if (DefSU != SU && 293 (Kind != SDep::Output || !MO.isDead() || 294 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 295 if (Kind == SDep::Anti) 296 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 297 else { 298 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 299 DefSU->getInstr()); 300 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 301 } 302 } 303 } 304 } 305 306 if (!MO.isDef()) { 307 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 308 // retrieve the existing SUnits list for this register's uses. 309 // Push this SUnit on the use list. 310 Uses[MO.getReg()].push_back(SU); 311 } 312 else { 313 addPhysRegDataDeps(SU, MO); 314 315 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 316 // retrieve the existing SUnits list for this register's defs. 317 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 318 319 // If a def is going to wrap back around to the top of the loop, 320 // backschedule it. 321 if (!UnitLatencies && DefList.empty()) { 322 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 323 if (I != LoopRegs.Deps.end()) { 324 const MachineOperand *UseMO = I->second.first; 325 unsigned Count = I->second.second; 326 const MachineInstr *UseMI = UseMO->getParent(); 327 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 328 const MCInstrDesc &UseMCID = UseMI->getDesc(); 329 const TargetSubtargetInfo &ST = 330 TM.getSubtarget<TargetSubtargetInfo>(); 331 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 332 // TODO: If we knew the total depth of the region here, we could 333 // handle the case where the whole loop is inside the region but 334 // is large enough that the isScheduleHigh trick isn't needed. 335 if (UseMOIdx < UseMCID.getNumOperands()) { 336 // Currently, we only support scheduling regions consisting of 337 // single basic blocks. Check to see if the instruction is in 338 // the same region by checking to see if it has the same parent. 339 if (UseMI->getParent() != MI->getParent()) { 340 unsigned Latency = SU->Latency; 341 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 342 Latency += SpecialAddressLatency; 343 // This is a wild guess as to the portion of the latency which 344 // will be overlapped by work done outside the current 345 // scheduling region. 346 Latency -= std::min(Latency, Count); 347 // Add the artificial edge. 348 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 349 /*Reg=*/0, /*isNormalMemory=*/false, 350 /*isMustAlias=*/false, 351 /*isArtificial=*/true)); 352 } else if (SpecialAddressLatency > 0 && 353 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 354 // The entire loop body is within the current scheduling region 355 // and the latency of this operation is assumed to be greater 356 // than the latency of the loop. 357 // TODO: Recursively mark data-edge predecessors as 358 // isScheduleHigh too. 359 SU->isScheduleHigh = true; 360 } 361 } 362 LoopRegs.Deps.erase(I); 363 } 364 } 365 366 // clear this register's use list 367 if (Uses.contains(MO.getReg())) 368 Uses[MO.getReg()].clear(); 369 370 if (!MO.isDead()) 371 DefList.clear(); 372 373 // Calls will not be reordered because of chain dependencies (see 374 // below). Since call operands are dead, calls may continue to be added 375 // to the DefList making dependence checking quadratic in the size of 376 // the block. Instead, we leave only one call at the back of the 377 // DefList. 378 if (SU->isCall) { 379 while (!DefList.empty() && DefList.back()->isCall) 380 DefList.pop_back(); 381 } 382 // Defs are pushed in the order they are visited and never reordered. 383 DefList.push_back(SU); 384 } 385} 386 387/// addVRegDefDeps - Add register output and data dependencies from this SUnit 388/// to instructions that occur later in the same scheduling region if they read 389/// from or write to the virtual register defined at OperIdx. 390/// 391/// TODO: Hoist loop induction variable increments. This has to be 392/// reevaluated. Generally, IV scheduling should be done before coalescing. 393void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 394 const MachineInstr *MI = SU->getInstr(); 395 unsigned Reg = MI->getOperand(OperIdx).getReg(); 396 397 // SSA defs do not have output/anti dependencies. 398 // The current operand is a def, so we have at least one. 399 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 400 return; 401 402 // Add output dependence to the next nearest def of this vreg. 403 // 404 // Unless this definition is dead, the output dependence should be 405 // transitively redundant with antidependencies from this definition's 406 // uses. We're conservative for now until we have a way to guarantee the uses 407 // are not eliminated sometime during scheduling. The output dependence edge 408 // is also useful if output latency exceeds def-use latency. 409 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 410 if (DefI == VRegDefs.end()) 411 VRegDefs.insert(VReg2SUnit(Reg, SU)); 412 else { 413 SUnit *DefSU = DefI->SU; 414 if (DefSU != SU && DefSU != &ExitSU) { 415 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 416 DefSU->getInstr()); 417 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 418 } 419 DefI->SU = SU; 420 } 421} 422 423/// addVRegUseDeps - Add a register data dependency if the instruction that 424/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 425/// register antidependency from this SUnit to instructions that occur later in 426/// the same scheduling region if they write the virtual register. 427/// 428/// TODO: Handle ExitSU "uses" properly. 429void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 430 MachineInstr *MI = SU->getInstr(); 431 unsigned Reg = MI->getOperand(OperIdx).getReg(); 432 433 // Lookup this operand's reaching definition. 434 assert(LIS && "vreg dependencies requires LiveIntervals"); 435 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 436 LiveInterval *LI = &LIS->getInterval(Reg); 437 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 438 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 439 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 440 // Phis and other noninstructions (after coalescing) have a NULL Def. 441 if (Def) { 442 SUnit *DefSU = getSUnit(Def); 443 if (DefSU) { 444 // The reaching Def lives within this scheduling region. 445 // Create a data dependence. 446 // 447 // TODO: Handle "special" address latencies cleanly. 448 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 449 if (!UnitLatencies) { 450 // Adjust the dependence latency using operand def/use information, then 451 // allow the target to perform its own adjustments. 452 ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 453 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 454 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 455 } 456 SU->addPred(dep); 457 } 458 } 459 460 // Add antidependence to the following def of the vreg it uses. 461 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 462 if (DefI != VRegDefs.end() && DefI->SU != SU) 463 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 464} 465 466/// Create an SUnit for each real instruction, numbered in top-down toplological 467/// order. The instruction order A < B, implies that no edge exists from B to A. 468/// 469/// Map each real instruction to its SUnit. 470/// 471/// After initSUnits, the SUnits vector is cannot be resized and the scheduler 472/// may hang onto SUnit pointers. We may relax this in the future by using SUnit 473/// IDs instead of pointers. 474void ScheduleDAGInstrs::initSUnits() { 475 // We'll be allocating one SUnit for each real instruction in the region, 476 // which is contained within a basic block. 477 SUnits.reserve(BB->size()); 478 479 for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { 480 MachineInstr *MI = I; 481 if (MI->isDebugValue()) 482 continue; 483 484 SUnit *SU = NewSUnit(MI); 485 MISUnitMap[MI] = SU; 486 487 SU->isCall = MI->isCall(); 488 SU->isCommutable = MI->isCommutable(); 489 490 // Assign the Latency field of SU using target-provided information. 491 if (UnitLatencies) 492 SU->Latency = 1; 493 else 494 ComputeLatency(SU); 495 } 496} 497 498void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 499 // Create an SUnit for each real instruction. 500 initSUnits(); 501 502 // We build scheduling units by walking a block's instruction list from bottom 503 // to top. 504 505 // Remember where a generic side-effecting instruction is as we procede. 506 SUnit *BarrierChain = 0, *AliasChain = 0; 507 508 // Memory references to specific known memory locations are tracked 509 // so that they can be given more precise dependencies. We track 510 // separately the known memory locations that may alias and those 511 // that are known not to alias 512 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 513 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 514 515 // Remove any stale debug info; sometimes BuildSchedGraph is called again 516 // without emitting the info from the previous call. 517 DbgValues.clear(); 518 FirstDbgValue = NULL; 519 520 assert(Defs.empty() && Uses.empty() && 521 "Only BuildGraph should update Defs/Uses"); 522 Defs.setRegLimit(TRI->getNumRegs()); 523 Uses.setRegLimit(TRI->getNumRegs()); 524 525 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 526 // FIXME: Allow SparseSet to reserve space for the creation of virtual 527 // registers during scheduling. Don't artificially inflate the Universe 528 // because we want to assert that vregs are not created during DAG building. 529 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 530 531 // Model data dependencies between instructions being scheduled and the 532 // ExitSU. 533 AddSchedBarrierDeps(); 534 535 // Walk the list of instructions, from bottom moving up. 536 MachineInstr *PrevMI = NULL; 537 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 538 MII != MIE; --MII) { 539 MachineInstr *MI = prior(MII); 540 if (MI && PrevMI) { 541 DbgValues.push_back(std::make_pair(PrevMI, MI)); 542 PrevMI = NULL; 543 } 544 545 if (MI->isDebugValue()) { 546 PrevMI = MI; 547 continue; 548 } 549 550 assert(!MI->isTerminator() && !MI->isLabel() && 551 "Cannot schedule terminators or labels!"); 552 553 SUnit *SU = MISUnitMap[MI]; 554 assert(SU && "No SUnit mapped to this MI"); 555 556 // Add register-based dependencies (data, anti, and output). 557 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 558 const MachineOperand &MO = MI->getOperand(j); 559 if (!MO.isReg()) continue; 560 unsigned Reg = MO.getReg(); 561 if (Reg == 0) continue; 562 563 if (TRI->isPhysicalRegister(Reg)) 564 addPhysRegDeps(SU, j); 565 else { 566 assert(!IsPostRA && "Virtual register encountered!"); 567 if (MO.isDef()) 568 addVRegDefDeps(SU, j); 569 else if (MO.readsReg()) // ignore undef operands 570 addVRegUseDeps(SU, j); 571 } 572 } 573 574 // Add chain dependencies. 575 // Chain dependencies used to enforce memory order should have 576 // latency of 0 (except for true dependency of Store followed by 577 // aliased Load... we estimate that with a single cycle of latency 578 // assuming the hardware will bypass) 579 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 580 // after stack slots are lowered to actual addresses. 581 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 582 // produce more precise dependence information. 583#define STORE_LOAD_LATENCY 1 584 unsigned TrueMemOrderLatency = 0; 585 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 586 (MI->hasVolatileMemoryRef() && 587 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 588 // Be conservative with these and add dependencies on all memory 589 // references, even those that are known to not alias. 590 for (std::map<const Value *, SUnit *>::iterator I = 591 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 592 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 593 } 594 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 595 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 596 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 597 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 598 } 599 NonAliasMemDefs.clear(); 600 NonAliasMemUses.clear(); 601 // Add SU to the barrier chain. 602 if (BarrierChain) 603 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 604 BarrierChain = SU; 605 606 // fall-through 607 new_alias_chain: 608 // Chain all possibly aliasing memory references though SU. 609 if (AliasChain) 610 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 611 AliasChain = SU; 612 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 613 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 614 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 615 E = AliasMemDefs.end(); I != E; ++I) { 616 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 617 } 618 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 619 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 620 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 621 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 622 } 623 PendingLoads.clear(); 624 AliasMemDefs.clear(); 625 AliasMemUses.clear(); 626 } else if (MI->mayStore()) { 627 bool MayAlias = true; 628 TrueMemOrderLatency = STORE_LOAD_LATENCY; 629 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 630 // A store to a specific PseudoSourceValue. Add precise dependencies. 631 // Record the def in MemDefs, first adding a dep if there is 632 // an existing def. 633 std::map<const Value *, SUnit *>::iterator I = 634 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 635 std::map<const Value *, SUnit *>::iterator IE = 636 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 637 if (I != IE) { 638 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 639 /*isNormalMemory=*/true)); 640 I->second = SU; 641 } else { 642 if (MayAlias) 643 AliasMemDefs[V] = SU; 644 else 645 NonAliasMemDefs[V] = SU; 646 } 647 // Handle the uses in MemUses, if there are any. 648 std::map<const Value *, std::vector<SUnit *> >::iterator J = 649 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 650 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 651 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 652 if (J != JE) { 653 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 654 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 655 /*Reg=*/0, /*isNormalMemory=*/true)); 656 J->second.clear(); 657 } 658 if (MayAlias) { 659 // Add dependencies from all the PendingLoads, i.e. loads 660 // with no underlying object. 661 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 662 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 663 // Add dependence on alias chain, if needed. 664 if (AliasChain) 665 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 666 } 667 // Add dependence on barrier chain, if needed. 668 if (BarrierChain) 669 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 670 } else { 671 // Treat all other stores conservatively. 672 goto new_alias_chain; 673 } 674 675 if (!ExitSU.isPred(SU)) 676 // Push store's up a bit to avoid them getting in between cmp 677 // and branches. 678 ExitSU.addPred(SDep(SU, SDep::Order, 0, 679 /*Reg=*/0, /*isNormalMemory=*/false, 680 /*isMustAlias=*/false, 681 /*isArtificial=*/true)); 682 } else if (MI->mayLoad()) { 683 bool MayAlias = true; 684 TrueMemOrderLatency = 0; 685 if (MI->isInvariantLoad(AA)) { 686 // Invariant load, no chain dependencies needed! 687 } else { 688 if (const Value *V = 689 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 690 // A load from a specific PseudoSourceValue. Add precise dependencies. 691 std::map<const Value *, SUnit *>::iterator I = 692 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 693 std::map<const Value *, SUnit *>::iterator IE = 694 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 695 if (I != IE) 696 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 697 /*isNormalMemory=*/true)); 698 if (MayAlias) 699 AliasMemUses[V].push_back(SU); 700 else 701 NonAliasMemUses[V].push_back(SU); 702 } else { 703 // A load with no underlying object. Depend on all 704 // potentially aliasing stores. 705 for (std::map<const Value *, SUnit *>::iterator I = 706 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 707 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 708 709 PendingLoads.push_back(SU); 710 MayAlias = true; 711 } 712 713 // Add dependencies on alias and barrier chains, if needed. 714 if (MayAlias && AliasChain) 715 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 716 if (BarrierChain) 717 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 718 } 719 } 720 } 721 if (PrevMI) 722 FirstDbgValue = PrevMI; 723 724 Defs.clear(); 725 Uses.clear(); 726 VRegDefs.clear(); 727 PendingLoads.clear(); 728 MISUnitMap.clear(); 729} 730 731void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 732 // Compute the latency for the node. 733 if (!InstrItins || InstrItins->isEmpty()) { 734 SU->Latency = 1; 735 736 // Simplistic target-independent heuristic: assume that loads take 737 // extra time. 738 if (SU->getInstr()->mayLoad()) 739 SU->Latency += 2; 740 } else { 741 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 742 } 743} 744 745void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 746 SDep& dep) const { 747 if (!InstrItins || InstrItins->isEmpty()) 748 return; 749 750 // For a data dependency with a known register... 751 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 752 return; 753 754 const unsigned Reg = dep.getReg(); 755 756 // ... find the definition of the register in the defining 757 // instruction 758 MachineInstr *DefMI = Def->getInstr(); 759 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 760 if (DefIdx != -1) { 761 const MachineOperand &MO = DefMI->getOperand(DefIdx); 762 if (MO.isReg() && MO.isImplicit() && 763 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 764 // This is an implicit def, getOperandLatency() won't return the correct 765 // latency. e.g. 766 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 767 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 768 // What we want is to compute latency between def of %D6/%D7 and use of 769 // %Q3 instead. 770 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 771 if (DefMI->getOperand(Op2).isReg()) 772 DefIdx = Op2; 773 } 774 MachineInstr *UseMI = Use->getInstr(); 775 // For all uses of the register, calculate the maxmimum latency 776 int Latency = -1; 777 if (UseMI) { 778 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 779 const MachineOperand &MO = UseMI->getOperand(i); 780 if (!MO.isReg() || !MO.isUse()) 781 continue; 782 unsigned MOReg = MO.getReg(); 783 if (MOReg != Reg) 784 continue; 785 786 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 787 UseMI, i); 788 Latency = std::max(Latency, UseCycle); 789 } 790 } else { 791 // UseMI is null, then it must be a scheduling barrier. 792 if (!InstrItins || InstrItins->isEmpty()) 793 return; 794 unsigned DefClass = DefMI->getDesc().getSchedClass(); 795 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 796 } 797 798 // If we found a latency, then replace the existing dependence latency. 799 if (Latency >= 0) 800 dep.setLatency(Latency); 801 } 802} 803 804void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 805 SU->getInstr()->dump(); 806} 807 808std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 809 std::string s; 810 raw_string_ostream oss(s); 811 if (SU == &EntrySU) 812 oss << "<entry>"; 813 else if (SU == &ExitSU) 814 oss << "<exit>"; 815 else 816 SU->getInstr()->print(oss); 817 return oss.str(); 818} 819 820/// Return the basic block label. It is not necessarilly unique because a block 821/// contains multiple scheduling regions. But it is fine for visualization. 822std::string ScheduleDAGInstrs::getDAGName() const { 823 return "dag." + BB->getFullName(); 824} 825