ScheduleDAGInstrs.cpp revision 68675c6c5b173021807e4e12cd250eeba63f6d0d
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "llvm/Operator.h" 17#include "llvm/Analysis/AliasAnalysis.h" 18#include "llvm/Analysis/ValueTracking.h" 19#include "llvm/CodeGen/LiveIntervalAnalysis.h" 20#include "llvm/CodeGen/MachineFunctionPass.h" 21#include "llvm/CodeGen/MachineMemOperand.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/CodeGen/PseudoSourceValue.h" 24#include "llvm/CodeGen/ScheduleDAGInstrs.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), 42 IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), 43 FirstDbgValue(0) { 44 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 45 DbgValues.clear(); 46 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 47 "Virtual registers must be removed prior to PostRA scheduling"); 48} 49 50/// getUnderlyingObjectFromInt - This is the function that does the work of 51/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 52static const Value *getUnderlyingObjectFromInt(const Value *V) { 53 do { 54 if (const Operator *U = dyn_cast<Operator>(V)) { 55 // If we find a ptrtoint, we can transfer control back to the 56 // regular getUnderlyingObjectFromInt. 57 if (U->getOpcode() == Instruction::PtrToInt) 58 return U->getOperand(0); 59 // If we find an add of a constant or a multiplied value, it's 60 // likely that the other operand will lead us to the base 61 // object. We don't have to worry about the case where the 62 // object address is somehow being computed by the multiply, 63 // because our callers only care when the result is an 64 // identifibale object. 65 if (U->getOpcode() != Instruction::Add || 66 (!isa<ConstantInt>(U->getOperand(1)) && 67 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 68 return V; 69 V = U->getOperand(0); 70 } else { 71 return V; 72 } 73 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 74 } while (1); 75} 76 77/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 78/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 79static const Value *getUnderlyingObject(const Value *V) { 80 // First just call Value::getUnderlyingObject to let it do what it does. 81 do { 82 V = GetUnderlyingObject(V); 83 // If it found an inttoptr, use special code to continue climing. 84 if (Operator::getOpcode(V) != Instruction::IntToPtr) 85 break; 86 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 87 // If that succeeded in finding a pointer, continue the search. 88 if (!O->getType()->isPointerTy()) 89 break; 90 V = O; 91 } while (1); 92 return V; 93} 94 95/// getUnderlyingObjectForInstr - If this machine instr has memory reference 96/// information and it can be tracked to a normal reference to a known 97/// object, return the Value for that object. Otherwise return null. 98static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 99 const MachineFrameInfo *MFI, 100 bool &MayAlias) { 101 MayAlias = true; 102 if (!MI->hasOneMemOperand() || 103 !(*MI->memoperands_begin())->getValue() || 104 (*MI->memoperands_begin())->isVolatile()) 105 return 0; 106 107 const Value *V = (*MI->memoperands_begin())->getValue(); 108 if (!V) 109 return 0; 110 111 V = getUnderlyingObject(V); 112 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 113 // For now, ignore PseudoSourceValues which may alias LLVM IR values 114 // because the code that uses this function has no way to cope with 115 // such aliases. 116 if (PSV->isAliased(MFI)) 117 return 0; 118 119 MayAlias = PSV->mayAlias(MFI); 120 return V; 121 } 122 123 if (isIdentifiedObject(V)) 124 return V; 125 126 return 0; 127} 128 129void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { 130 LoopRegs.Deps.clear(); 131 if (MachineLoop *ML = MLI.getLoopFor(BB)) 132 if (BB == ML->getLoopLatch()) 133 LoopRegs.VisitLoop(ML); 134} 135 136void ScheduleDAGInstrs::finishBlock() { 137 // Nothing to do. 138} 139 140/// Initialize the map with the number of registers. 141void Reg2SUnitsMap::setRegLimit(unsigned Limit) { 142 PhysRegSet.setUniverse(Limit); 143 SUnits.resize(Limit); 144} 145 146/// Clear the map without deallocating storage. 147void Reg2SUnitsMap::clear() { 148 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 149 SUnits[*I].clear(); 150 } 151 PhysRegSet.clear(); 152} 153 154/// Initialize the DAG and common scheduler state for the current scheduling 155/// region. This does not actually create the DAG, only clears it. The 156/// scheduling driver may call BuildSchedGraph multiple times per scheduling 157/// region. 158void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 159 MachineBasicBlock::iterator begin, 160 MachineBasicBlock::iterator end, 161 unsigned endcount) { 162 BB = bb; 163 RegionBegin = begin; 164 RegionEnd = end; 165 EndIndex = endcount; 166 167 // Check to see if the scheduler cares about latencies. 168 UnitLatencies = forceUnitLatencies(); 169 170 ScheduleDAG::clearDAG(); 171} 172 173/// Close the current scheduling region. Don't clear any state in case the 174/// driver wants to refer to the previous scheduling region. 175void ScheduleDAGInstrs::exitRegion() { 176 // Nothing to do. 177} 178 179/// addSchedBarrierDeps - Add dependencies from instructions in the current 180/// list of instructions being scheduled to scheduling barrier by adding 181/// the exit SU to the register defs and use list. This is because we want to 182/// make sure instructions which define registers that are either used by 183/// the terminator or are live-out are properly scheduled. This is 184/// especially important when the definition latency of the return value(s) 185/// are too high to be hidden by the branch or when the liveout registers 186/// used by instructions in the fallthrough block. 187void ScheduleDAGInstrs::addSchedBarrierDeps() { 188 MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 189 ExitSU.setInstr(ExitMI); 190 bool AllDepKnown = ExitMI && 191 (ExitMI->isCall() || ExitMI->isBarrier()); 192 if (ExitMI && AllDepKnown) { 193 // If it's a call or a barrier, add dependencies on the defs and uses of 194 // instruction. 195 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 196 const MachineOperand &MO = ExitMI->getOperand(i); 197 if (!MO.isReg() || MO.isDef()) continue; 198 unsigned Reg = MO.getReg(); 199 if (Reg == 0) continue; 200 201 if (TRI->isPhysicalRegister(Reg)) 202 Uses[Reg].push_back(&ExitSU); 203 else 204 assert(!IsPostRA && "Virtual register encountered after regalloc."); 205 } 206 } else { 207 // For others, e.g. fallthrough, conditional branch, assume the exit 208 // uses all the registers that are livein to the successor blocks. 209 SmallSet<unsigned, 8> Seen; 210 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 211 SE = BB->succ_end(); SI != SE; ++SI) 212 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 213 E = (*SI)->livein_end(); I != E; ++I) { 214 unsigned Reg = *I; 215 if (Seen.insert(Reg)) 216 Uses[Reg].push_back(&ExitSU); 217 } 218 } 219} 220 221/// MO is an operand of SU's instruction that defines a physical register. Add 222/// data dependencies from SU to any uses of the physical register. 223void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 224 const MachineOperand &MO) { 225 assert(MO.isDef() && "expect physreg def"); 226 227 // Ask the target if address-backscheduling is desirable, and if so how much. 228 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 229 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 230 unsigned DataLatency = SU->Latency; 231 232 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 233 if (!Uses.contains(*Alias)) 234 continue; 235 std::vector<SUnit*> &UseList = Uses[*Alias]; 236 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 237 SUnit *UseSU = UseList[i]; 238 if (UseSU == SU) 239 continue; 240 unsigned LDataLatency = DataLatency; 241 // Optionally add in a special extra latency for nodes that 242 // feed addresses. 243 // TODO: Perhaps we should get rid of 244 // SpecialAddressLatency and just move this into 245 // adjustSchedDependency for the targets that care about it. 246 if (SpecialAddressLatency != 0 && !UnitLatencies && 247 UseSU != &ExitSU) { 248 MachineInstr *UseMI = UseSU->getInstr(); 249 const MCInstrDesc &UseMCID = UseMI->getDesc(); 250 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 251 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 252 if (RegUseIndex >= 0 && 253 (UseMI->mayLoad() || UseMI->mayStore()) && 254 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 255 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 256 LDataLatency += SpecialAddressLatency; 257 } 258 // Adjust the dependence latency using operand def/use 259 // information (if any), and then allow the target to 260 // perform its own adjustments. 261 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 262 if (!UnitLatencies) { 263 computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 264 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 265 } 266 UseSU->addPred(dep); 267 } 268 } 269} 270 271/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 272/// this SUnit to following instructions in the same scheduling region that 273/// depend the physical register referenced at OperIdx. 274void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 275 const MachineInstr *MI = SU->getInstr(); 276 const MachineOperand &MO = MI->getOperand(OperIdx); 277 278 // Optionally add output and anti dependencies. For anti 279 // dependencies we use a latency of 0 because for a multi-issue 280 // target we want to allow the defining instruction to issue 281 // in the same cycle as the using instruction. 282 // TODO: Using a latency of 1 here for output dependencies assumes 283 // there's no cost for reusing registers. 284 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 285 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 286 if (!Defs.contains(*Alias)) 287 continue; 288 std::vector<SUnit *> &DefList = Defs[*Alias]; 289 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 290 SUnit *DefSU = DefList[i]; 291 if (DefSU == &ExitSU) 292 continue; 293 if (DefSU != SU && 294 (Kind != SDep::Output || !MO.isDead() || 295 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 296 if (Kind == SDep::Anti) 297 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 298 else { 299 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 300 DefSU->getInstr()); 301 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 302 } 303 } 304 } 305 } 306 307 if (!MO.isDef()) { 308 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 309 // retrieve the existing SUnits list for this register's uses. 310 // Push this SUnit on the use list. 311 Uses[MO.getReg()].push_back(SU); 312 } 313 else { 314 addPhysRegDataDeps(SU, MO); 315 316 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 317 // retrieve the existing SUnits list for this register's defs. 318 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 319 320 // If a def is going to wrap back around to the top of the loop, 321 // backschedule it. 322 if (!UnitLatencies && DefList.empty()) { 323 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 324 if (I != LoopRegs.Deps.end()) { 325 const MachineOperand *UseMO = I->second.first; 326 unsigned Count = I->second.second; 327 const MachineInstr *UseMI = UseMO->getParent(); 328 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 329 const MCInstrDesc &UseMCID = UseMI->getDesc(); 330 const TargetSubtargetInfo &ST = 331 TM.getSubtarget<TargetSubtargetInfo>(); 332 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 333 // TODO: If we knew the total depth of the region here, we could 334 // handle the case where the whole loop is inside the region but 335 // is large enough that the isScheduleHigh trick isn't needed. 336 if (UseMOIdx < UseMCID.getNumOperands()) { 337 // Currently, we only support scheduling regions consisting of 338 // single basic blocks. Check to see if the instruction is in 339 // the same region by checking to see if it has the same parent. 340 if (UseMI->getParent() != MI->getParent()) { 341 unsigned Latency = SU->Latency; 342 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 343 Latency += SpecialAddressLatency; 344 // This is a wild guess as to the portion of the latency which 345 // will be overlapped by work done outside the current 346 // scheduling region. 347 Latency -= std::min(Latency, Count); 348 // Add the artificial edge. 349 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 350 /*Reg=*/0, /*isNormalMemory=*/false, 351 /*isMustAlias=*/false, 352 /*isArtificial=*/true)); 353 } else if (SpecialAddressLatency > 0 && 354 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 355 // The entire loop body is within the current scheduling region 356 // and the latency of this operation is assumed to be greater 357 // than the latency of the loop. 358 // TODO: Recursively mark data-edge predecessors as 359 // isScheduleHigh too. 360 SU->isScheduleHigh = true; 361 } 362 } 363 LoopRegs.Deps.erase(I); 364 } 365 } 366 367 // clear this register's use list 368 if (Uses.contains(MO.getReg())) 369 Uses[MO.getReg()].clear(); 370 371 if (!MO.isDead()) 372 DefList.clear(); 373 374 // Calls will not be reordered because of chain dependencies (see 375 // below). Since call operands are dead, calls may continue to be added 376 // to the DefList making dependence checking quadratic in the size of 377 // the block. Instead, we leave only one call at the back of the 378 // DefList. 379 if (SU->isCall) { 380 while (!DefList.empty() && DefList.back()->isCall) 381 DefList.pop_back(); 382 } 383 // Defs are pushed in the order they are visited and never reordered. 384 DefList.push_back(SU); 385 } 386} 387 388/// addVRegDefDeps - Add register output and data dependencies from this SUnit 389/// to instructions that occur later in the same scheduling region if they read 390/// from or write to the virtual register defined at OperIdx. 391/// 392/// TODO: Hoist loop induction variable increments. This has to be 393/// reevaluated. Generally, IV scheduling should be done before coalescing. 394void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 395 const MachineInstr *MI = SU->getInstr(); 396 unsigned Reg = MI->getOperand(OperIdx).getReg(); 397 398 // SSA defs do not have output/anti dependencies. 399 // The current operand is a def, so we have at least one. 400 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 401 return; 402 403 // Add output dependence to the next nearest def of this vreg. 404 // 405 // Unless this definition is dead, the output dependence should be 406 // transitively redundant with antidependencies from this definition's 407 // uses. We're conservative for now until we have a way to guarantee the uses 408 // are not eliminated sometime during scheduling. The output dependence edge 409 // is also useful if output latency exceeds def-use latency. 410 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 411 if (DefI == VRegDefs.end()) 412 VRegDefs.insert(VReg2SUnit(Reg, SU)); 413 else { 414 SUnit *DefSU = DefI->SU; 415 if (DefSU != SU && DefSU != &ExitSU) { 416 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 417 DefSU->getInstr()); 418 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 419 } 420 DefI->SU = SU; 421 } 422} 423 424/// addVRegUseDeps - Add a register data dependency if the instruction that 425/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 426/// register antidependency from this SUnit to instructions that occur later in 427/// the same scheduling region if they write the virtual register. 428/// 429/// TODO: Handle ExitSU "uses" properly. 430void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 431 MachineInstr *MI = SU->getInstr(); 432 unsigned Reg = MI->getOperand(OperIdx).getReg(); 433 434 // Lookup this operand's reaching definition. 435 assert(LIS && "vreg dependencies requires LiveIntervals"); 436 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 437 LiveInterval *LI = &LIS->getInterval(Reg); 438 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 439 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 440 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 441 // Phis and other noninstructions (after coalescing) have a NULL Def. 442 if (Def) { 443 SUnit *DefSU = getSUnit(Def); 444 if (DefSU) { 445 // The reaching Def lives within this scheduling region. 446 // Create a data dependence. 447 // 448 // TODO: Handle "special" address latencies cleanly. 449 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 450 if (!UnitLatencies) { 451 // Adjust the dependence latency using operand def/use information, then 452 // allow the target to perform its own adjustments. 453 computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 454 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 455 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 456 } 457 SU->addPred(dep); 458 } 459 } 460 461 // Add antidependence to the following def of the vreg it uses. 462 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 463 if (DefI != VRegDefs.end() && DefI->SU != SU) 464 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 465} 466 467/// Create an SUnit for each real instruction, numbered in top-down toplological 468/// order. The instruction order A < B, implies that no edge exists from B to A. 469/// 470/// Map each real instruction to its SUnit. 471/// 472/// After initSUnits, the SUnits vector is cannot be resized and the scheduler 473/// may hang onto SUnit pointers. We may relax this in the future by using SUnit 474/// IDs instead of pointers. 475void ScheduleDAGInstrs::initSUnits() { 476 // We'll be allocating one SUnit for each real instruction in the region, 477 // which is contained within a basic block. 478 SUnits.reserve(BB->size()); 479 480 for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 481 MachineInstr *MI = I; 482 if (MI->isDebugValue()) 483 continue; 484 485 SUnit *SU = newSUnit(MI); 486 MISUnitMap[MI] = SU; 487 488 SU->isCall = MI->isCall(); 489 SU->isCommutable = MI->isCommutable(); 490 491 // Assign the Latency field of SU using target-provided information. 492 if (UnitLatencies) 493 SU->Latency = 1; 494 else 495 computeLatency(SU); 496 } 497} 498 499void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { 500 // Create an SUnit for each real instruction. 501 initSUnits(); 502 503 // We build scheduling units by walking a block's instruction list from bottom 504 // to top. 505 506 // Remember where a generic side-effecting instruction is as we procede. 507 SUnit *BarrierChain = 0, *AliasChain = 0; 508 509 // Memory references to specific known memory locations are tracked 510 // so that they can be given more precise dependencies. We track 511 // separately the known memory locations that may alias and those 512 // that are known not to alias 513 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 514 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 515 516 // Remove any stale debug info; sometimes BuildSchedGraph is called again 517 // without emitting the info from the previous call. 518 DbgValues.clear(); 519 FirstDbgValue = NULL; 520 521 assert(Defs.empty() && Uses.empty() && 522 "Only BuildGraph should update Defs/Uses"); 523 Defs.setRegLimit(TRI->getNumRegs()); 524 Uses.setRegLimit(TRI->getNumRegs()); 525 526 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 527 // FIXME: Allow SparseSet to reserve space for the creation of virtual 528 // registers during scheduling. Don't artificially inflate the Universe 529 // because we want to assert that vregs are not created during DAG building. 530 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 531 532 // Model data dependencies between instructions being scheduled and the 533 // ExitSU. 534 addSchedBarrierDeps(); 535 536 // Walk the list of instructions, from bottom moving up. 537 MachineInstr *PrevMI = NULL; 538 for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 539 MII != MIE; --MII) { 540 MachineInstr *MI = prior(MII); 541 if (MI && PrevMI) { 542 DbgValues.push_back(std::make_pair(PrevMI, MI)); 543 PrevMI = NULL; 544 } 545 546 if (MI->isDebugValue()) { 547 PrevMI = MI; 548 continue; 549 } 550 551 assert(!MI->isTerminator() && !MI->isLabel() && 552 "Cannot schedule terminators or labels!"); 553 554 SUnit *SU = MISUnitMap[MI]; 555 assert(SU && "No SUnit mapped to this MI"); 556 557 // Add register-based dependencies (data, anti, and output). 558 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 559 const MachineOperand &MO = MI->getOperand(j); 560 if (!MO.isReg()) continue; 561 unsigned Reg = MO.getReg(); 562 if (Reg == 0) continue; 563 564 if (TRI->isPhysicalRegister(Reg)) 565 addPhysRegDeps(SU, j); 566 else { 567 assert(!IsPostRA && "Virtual register encountered!"); 568 if (MO.isDef()) 569 addVRegDefDeps(SU, j); 570 else if (MO.readsReg()) // ignore undef operands 571 addVRegUseDeps(SU, j); 572 } 573 } 574 575 // Add chain dependencies. 576 // Chain dependencies used to enforce memory order should have 577 // latency of 0 (except for true dependency of Store followed by 578 // aliased Load... we estimate that with a single cycle of latency 579 // assuming the hardware will bypass) 580 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 581 // after stack slots are lowered to actual addresses. 582 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 583 // produce more precise dependence information. 584#define STORE_LOAD_LATENCY 1 585 unsigned TrueMemOrderLatency = 0; 586 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 587 (MI->hasVolatileMemoryRef() && 588 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 589 // Be conservative with these and add dependencies on all memory 590 // references, even those that are known to not alias. 591 for (std::map<const Value *, SUnit *>::iterator I = 592 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 593 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 594 } 595 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 596 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 597 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 598 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 599 } 600 NonAliasMemDefs.clear(); 601 NonAliasMemUses.clear(); 602 // Add SU to the barrier chain. 603 if (BarrierChain) 604 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 605 BarrierChain = SU; 606 607 // fall-through 608 new_alias_chain: 609 // Chain all possibly aliasing memory references though SU. 610 if (AliasChain) 611 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 612 AliasChain = SU; 613 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 614 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 615 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 616 E = AliasMemDefs.end(); I != E; ++I) { 617 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 618 } 619 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 620 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 621 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 622 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 623 } 624 PendingLoads.clear(); 625 AliasMemDefs.clear(); 626 AliasMemUses.clear(); 627 } else if (MI->mayStore()) { 628 bool MayAlias = true; 629 TrueMemOrderLatency = STORE_LOAD_LATENCY; 630 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 631 // A store to a specific PseudoSourceValue. Add precise dependencies. 632 // Record the def in MemDefs, first adding a dep if there is 633 // an existing def. 634 std::map<const Value *, SUnit *>::iterator I = 635 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 636 std::map<const Value *, SUnit *>::iterator IE = 637 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 638 if (I != IE) { 639 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 640 /*isNormalMemory=*/true)); 641 I->second = SU; 642 } else { 643 if (MayAlias) 644 AliasMemDefs[V] = SU; 645 else 646 NonAliasMemDefs[V] = SU; 647 } 648 // Handle the uses in MemUses, if there are any. 649 std::map<const Value *, std::vector<SUnit *> >::iterator J = 650 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 651 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 652 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 653 if (J != JE) { 654 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 655 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 656 /*Reg=*/0, /*isNormalMemory=*/true)); 657 J->second.clear(); 658 } 659 if (MayAlias) { 660 // Add dependencies from all the PendingLoads, i.e. loads 661 // with no underlying object. 662 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 663 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 664 // Add dependence on alias chain, if needed. 665 if (AliasChain) 666 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 667 } 668 // Add dependence on barrier chain, if needed. 669 if (BarrierChain) 670 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 671 } else { 672 // Treat all other stores conservatively. 673 goto new_alias_chain; 674 } 675 676 if (!ExitSU.isPred(SU)) 677 // Push store's up a bit to avoid them getting in between cmp 678 // and branches. 679 ExitSU.addPred(SDep(SU, SDep::Order, 0, 680 /*Reg=*/0, /*isNormalMemory=*/false, 681 /*isMustAlias=*/false, 682 /*isArtificial=*/true)); 683 } else if (MI->mayLoad()) { 684 bool MayAlias = true; 685 TrueMemOrderLatency = 0; 686 if (MI->isInvariantLoad(AA)) { 687 // Invariant load, no chain dependencies needed! 688 } else { 689 if (const Value *V = 690 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 691 // A load from a specific PseudoSourceValue. Add precise dependencies. 692 std::map<const Value *, SUnit *>::iterator I = 693 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 694 std::map<const Value *, SUnit *>::iterator IE = 695 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 696 if (I != IE) 697 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 698 /*isNormalMemory=*/true)); 699 if (MayAlias) 700 AliasMemUses[V].push_back(SU); 701 else 702 NonAliasMemUses[V].push_back(SU); 703 } else { 704 // A load with no underlying object. Depend on all 705 // potentially aliasing stores. 706 for (std::map<const Value *, SUnit *>::iterator I = 707 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 708 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 709 710 PendingLoads.push_back(SU); 711 MayAlias = true; 712 } 713 714 // Add dependencies on alias and barrier chains, if needed. 715 if (MayAlias && AliasChain) 716 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 717 if (BarrierChain) 718 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 719 } 720 } 721 } 722 if (PrevMI) 723 FirstDbgValue = PrevMI; 724 725 Defs.clear(); 726 Uses.clear(); 727 VRegDefs.clear(); 728 PendingLoads.clear(); 729 MISUnitMap.clear(); 730} 731 732void ScheduleDAGInstrs::computeLatency(SUnit *SU) { 733 // Compute the latency for the node. 734 if (!InstrItins || InstrItins->isEmpty()) { 735 SU->Latency = 1; 736 737 // Simplistic target-independent heuristic: assume that loads take 738 // extra time. 739 if (SU->getInstr()->mayLoad()) 740 SU->Latency += 2; 741 } else { 742 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 743 } 744} 745 746void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, 747 SDep& dep) const { 748 if (!InstrItins || InstrItins->isEmpty()) 749 return; 750 751 // For a data dependency with a known register... 752 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 753 return; 754 755 const unsigned Reg = dep.getReg(); 756 757 // ... find the definition of the register in the defining 758 // instruction 759 MachineInstr *DefMI = Def->getInstr(); 760 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 761 if (DefIdx != -1) { 762 const MachineOperand &MO = DefMI->getOperand(DefIdx); 763 if (MO.isReg() && MO.isImplicit() && 764 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 765 // This is an implicit def, getOperandLatency() won't return the correct 766 // latency. e.g. 767 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 768 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 769 // What we want is to compute latency between def of %D6/%D7 and use of 770 // %Q3 instead. 771 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 772 if (DefMI->getOperand(Op2).isReg()) 773 DefIdx = Op2; 774 } 775 MachineInstr *UseMI = Use->getInstr(); 776 // For all uses of the register, calculate the maxmimum latency 777 int Latency = -1; 778 if (UseMI) { 779 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 780 const MachineOperand &MO = UseMI->getOperand(i); 781 if (!MO.isReg() || !MO.isUse()) 782 continue; 783 unsigned MOReg = MO.getReg(); 784 if (MOReg != Reg) 785 continue; 786 787 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 788 UseMI, i); 789 Latency = std::max(Latency, UseCycle); 790 } 791 } else { 792 // UseMI is null, then it must be a scheduling barrier. 793 if (!InstrItins || InstrItins->isEmpty()) 794 return; 795 unsigned DefClass = DefMI->getDesc().getSchedClass(); 796 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 797 } 798 799 // If we found a latency, then replace the existing dependence latency. 800 if (Latency >= 0) 801 dep.setLatency(Latency); 802 } 803} 804 805void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 806 SU->getInstr()->dump(); 807} 808 809std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 810 std::string s; 811 raw_string_ostream oss(s); 812 if (SU == &EntrySU) 813 oss << "<entry>"; 814 else if (SU == &ExitSU) 815 oss << "<exit>"; 816 else 817 SU->getInstr()->print(oss); 818 return oss.str(); 819} 820 821/// Return the basic block label. It is not necessarilly unique because a block 822/// contains multiple scheduling regions. But it is fine for visualization. 823std::string ScheduleDAGInstrs::getDAGName() const { 824 return "dag." + BB->getFullName(); 825} 826