ScheduleDAGInstrs.cpp revision 702d489a959b202ab06ce4bafa0bcb1fbfd2c3e4
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "ScheduleDAGInstrs.h" 17#include "llvm/Operator.h" 18#include "llvm/Analysis/AliasAnalysis.h" 19#include "llvm/Analysis/ValueTracking.h" 20#include "llvm/CodeGen/LiveIntervalAnalysis.h" 21#include "llvm/CodeGen/MachineFunctionPass.h" 22#include "llvm/CodeGen/MachineMemOperand.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), 42 LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) { 43 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 44 DbgValues.clear(); 45 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 46 "Virtual registers must be removed prior to PostRA scheduling"); 47} 48 49/// Run - perform scheduling. 50/// 51void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, 52 MachineBasicBlock::iterator begin, 53 MachineBasicBlock::iterator end, 54 unsigned endcount) { 55 BB = bb; 56 Begin = begin; 57 InsertPosIndex = endcount; 58 59 // Check to see if the scheduler cares about latencies. 60 UnitLatencies = ForceUnitLatencies(); 61 62 ScheduleDAG::Run(bb, end); 63} 64 65/// getUnderlyingObjectFromInt - This is the function that does the work of 66/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 67static const Value *getUnderlyingObjectFromInt(const Value *V) { 68 do { 69 if (const Operator *U = dyn_cast<Operator>(V)) { 70 // If we find a ptrtoint, we can transfer control back to the 71 // regular getUnderlyingObjectFromInt. 72 if (U->getOpcode() == Instruction::PtrToInt) 73 return U->getOperand(0); 74 // If we find an add of a constant or a multiplied value, it's 75 // likely that the other operand will lead us to the base 76 // object. We don't have to worry about the case where the 77 // object address is somehow being computed by the multiply, 78 // because our callers only care when the result is an 79 // identifibale object. 80 if (U->getOpcode() != Instruction::Add || 81 (!isa<ConstantInt>(U->getOperand(1)) && 82 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 83 return V; 84 V = U->getOperand(0); 85 } else { 86 return V; 87 } 88 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 89 } while (1); 90} 91 92/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 93/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 94static const Value *getUnderlyingObject(const Value *V) { 95 // First just call Value::getUnderlyingObject to let it do what it does. 96 do { 97 V = GetUnderlyingObject(V); 98 // If it found an inttoptr, use special code to continue climing. 99 if (Operator::getOpcode(V) != Instruction::IntToPtr) 100 break; 101 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 102 // If that succeeded in finding a pointer, continue the search. 103 if (!O->getType()->isPointerTy()) 104 break; 105 V = O; 106 } while (1); 107 return V; 108} 109 110/// getUnderlyingObjectForInstr - If this machine instr has memory reference 111/// information and it can be tracked to a normal reference to a known 112/// object, return the Value for that object. Otherwise return null. 113static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 114 const MachineFrameInfo *MFI, 115 bool &MayAlias) { 116 MayAlias = true; 117 if (!MI->hasOneMemOperand() || 118 !(*MI->memoperands_begin())->getValue() || 119 (*MI->memoperands_begin())->isVolatile()) 120 return 0; 121 122 const Value *V = (*MI->memoperands_begin())->getValue(); 123 if (!V) 124 return 0; 125 126 V = getUnderlyingObject(V); 127 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 128 // For now, ignore PseudoSourceValues which may alias LLVM IR values 129 // because the code that uses this function has no way to cope with 130 // such aliases. 131 if (PSV->isAliased(MFI)) 132 return 0; 133 134 MayAlias = PSV->mayAlias(MFI); 135 return V; 136 } 137 138 if (isIdentifiedObject(V)) 139 return V; 140 141 return 0; 142} 143 144void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 145 LoopRegs.Deps.clear(); 146 if (MachineLoop *ML = MLI.getLoopFor(BB)) 147 if (BB == ML->getLoopLatch()) 148 LoopRegs.VisitLoop(ML); 149} 150 151/// Initialize the map with the number of registers. 152void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) { 153 PhysRegSet.setUniverse(Limit); 154 SUnits.resize(Limit); 155} 156 157/// Clear the map without deallocating storage. 158void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { 159 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { 160 SUnits[*I].clear(); 161 } 162 PhysRegSet.clear(); 163} 164 165/// AddSchedBarrierDeps - Add dependencies from instructions in the current 166/// list of instructions being scheduled to scheduling barrier by adding 167/// the exit SU to the register defs and use list. This is because we want to 168/// make sure instructions which define registers that are either used by 169/// the terminator or are live-out are properly scheduled. This is 170/// especially important when the definition latency of the return value(s) 171/// are too high to be hidden by the branch or when the liveout registers 172/// used by instructions in the fallthrough block. 173void ScheduleDAGInstrs::AddSchedBarrierDeps() { 174 MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; 175 ExitSU.setInstr(ExitMI); 176 bool AllDepKnown = ExitMI && 177 (ExitMI->isCall() || ExitMI->isBarrier()); 178 if (ExitMI && AllDepKnown) { 179 // If it's a call or a barrier, add dependencies on the defs and uses of 180 // instruction. 181 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 182 const MachineOperand &MO = ExitMI->getOperand(i); 183 if (!MO.isReg() || MO.isDef()) continue; 184 unsigned Reg = MO.getReg(); 185 if (Reg == 0) continue; 186 187 if (TRI->isPhysicalRegister(Reg)) 188 Uses[Reg].push_back(&ExitSU); 189 else 190 assert(!IsPostRA && "Virtual register encountered after regalloc."); 191 } 192 } else { 193 // For others, e.g. fallthrough, conditional branch, assume the exit 194 // uses all the registers that are livein to the successor blocks. 195 SmallSet<unsigned, 8> Seen; 196 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 197 SE = BB->succ_end(); SI != SE; ++SI) 198 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 199 E = (*SI)->livein_end(); I != E; ++I) { 200 unsigned Reg = *I; 201 if (Seen.insert(Reg)) 202 Uses[Reg].push_back(&ExitSU); 203 } 204 } 205} 206 207/// MO is an operand of SU's instruction that defines a physical register. Add 208/// data dependencies from SU to any uses of the physical register. 209void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, 210 const MachineOperand &MO) { 211 assert(MO.isDef() && "expect physreg def"); 212 213 // Ask the target if address-backscheduling is desirable, and if so how much. 214 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 215 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 216 unsigned DataLatency = SU->Latency; 217 218 for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 219 if (!Uses.contains(*Alias)) 220 continue; 221 std::vector<SUnit*> &UseList = Uses[*Alias]; 222 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 223 SUnit *UseSU = UseList[i]; 224 if (UseSU == SU) 225 continue; 226 unsigned LDataLatency = DataLatency; 227 // Optionally add in a special extra latency for nodes that 228 // feed addresses. 229 // TODO: Perhaps we should get rid of 230 // SpecialAddressLatency and just move this into 231 // adjustSchedDependency for the targets that care about it. 232 if (SpecialAddressLatency != 0 && !UnitLatencies && 233 UseSU != &ExitSU) { 234 MachineInstr *UseMI = UseSU->getInstr(); 235 const MCInstrDesc &UseMCID = UseMI->getDesc(); 236 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); 237 assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); 238 if (RegUseIndex >= 0 && 239 (UseMI->mayLoad() || UseMI->mayStore()) && 240 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 241 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 242 LDataLatency += SpecialAddressLatency; 243 } 244 // Adjust the dependence latency using operand def/use 245 // information (if any), and then allow the target to 246 // perform its own adjustments. 247 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); 248 if (!UnitLatencies) { 249 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 250 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 251 } 252 UseSU->addPred(dep); 253 } 254 } 255} 256 257/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 258/// this SUnit to following instructions in the same scheduling region that 259/// depend the physical register referenced at OperIdx. 260void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 261 const MachineInstr *MI = SU->getInstr(); 262 const MachineOperand &MO = MI->getOperand(OperIdx); 263 264 // Optionally add output and anti dependencies. For anti 265 // dependencies we use a latency of 0 because for a multi-issue 266 // target we want to allow the defining instruction to issue 267 // in the same cycle as the using instruction. 268 // TODO: Using a latency of 1 here for output dependencies assumes 269 // there's no cost for reusing registers. 270 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 271 for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { 272 if (!Defs.contains(*Alias)) 273 continue; 274 std::vector<SUnit *> &DefList = Defs[*Alias]; 275 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 276 SUnit *DefSU = DefList[i]; 277 if (DefSU == &ExitSU) 278 continue; 279 if (DefSU != SU && 280 (Kind != SDep::Output || !MO.isDead() || 281 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 282 if (Kind == SDep::Anti) 283 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 284 else { 285 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 286 DefSU->getInstr()); 287 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 288 } 289 } 290 } 291 } 292 293 if (!MO.isDef()) { 294 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 295 // retrieve the existing SUnits list for this register's uses. 296 // Push this SUnit on the use list. 297 Uses[MO.getReg()].push_back(SU); 298 } 299 else { 300 addPhysRegDataDeps(SU, MO); 301 302 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 303 // retrieve the existing SUnits list for this register's defs. 304 std::vector<SUnit *> &DefList = Defs[MO.getReg()]; 305 306 // If a def is going to wrap back around to the top of the loop, 307 // backschedule it. 308 if (!UnitLatencies && DefList.empty()) { 309 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); 310 if (I != LoopRegs.Deps.end()) { 311 const MachineOperand *UseMO = I->second.first; 312 unsigned Count = I->second.second; 313 const MachineInstr *UseMI = UseMO->getParent(); 314 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 315 const MCInstrDesc &UseMCID = UseMI->getDesc(); 316 const TargetSubtargetInfo &ST = 317 TM.getSubtarget<TargetSubtargetInfo>(); 318 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 319 // TODO: If we knew the total depth of the region here, we could 320 // handle the case where the whole loop is inside the region but 321 // is large enough that the isScheduleHigh trick isn't needed. 322 if (UseMOIdx < UseMCID.getNumOperands()) { 323 // Currently, we only support scheduling regions consisting of 324 // single basic blocks. Check to see if the instruction is in 325 // the same region by checking to see if it has the same parent. 326 if (UseMI->getParent() != MI->getParent()) { 327 unsigned Latency = SU->Latency; 328 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 329 Latency += SpecialAddressLatency; 330 // This is a wild guess as to the portion of the latency which 331 // will be overlapped by work done outside the current 332 // scheduling region. 333 Latency -= std::min(Latency, Count); 334 // Add the artificial edge. 335 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 336 /*Reg=*/0, /*isNormalMemory=*/false, 337 /*isMustAlias=*/false, 338 /*isArtificial=*/true)); 339 } else if (SpecialAddressLatency > 0 && 340 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 341 // The entire loop body is within the current scheduling region 342 // and the latency of this operation is assumed to be greater 343 // than the latency of the loop. 344 // TODO: Recursively mark data-edge predecessors as 345 // isScheduleHigh too. 346 SU->isScheduleHigh = true; 347 } 348 } 349 LoopRegs.Deps.erase(I); 350 } 351 } 352 353 // clear this register's use list 354 if (Uses.contains(MO.getReg())) 355 Uses[MO.getReg()].clear(); 356 357 if (!MO.isDead()) 358 DefList.clear(); 359 360 // Calls will not be reordered because of chain dependencies (see 361 // below). Since call operands are dead, calls may continue to be added 362 // to the DefList making dependence checking quadratic in the size of 363 // the block. Instead, we leave only one call at the back of the 364 // DefList. 365 if (SU->isCall) { 366 while (!DefList.empty() && DefList.back()->isCall) 367 DefList.pop_back(); 368 } 369 // Defs are pushed in the order they are visited and never reordered. 370 DefList.push_back(SU); 371 } 372} 373 374/// addVRegDefDeps - Add register output and data dependencies from this SUnit 375/// to instructions that occur later in the same scheduling region if they read 376/// from or write to the virtual register defined at OperIdx. 377/// 378/// TODO: Hoist loop induction variable increments. This has to be 379/// reevaluated. Generally, IV scheduling should be done before coalescing. 380void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 381 const MachineInstr *MI = SU->getInstr(); 382 unsigned Reg = MI->getOperand(OperIdx).getReg(); 383 384 // SSA defs do not have output/anti dependencies. 385 // The current operand is a def, so we have at least one. 386 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 387 return; 388 389 // Add output dependence to the next nearest def of this vreg. 390 // 391 // Unless this definition is dead, the output dependence should be 392 // transitively redundant with antidependencies from this definition's 393 // uses. We're conservative for now until we have a way to guarantee the uses 394 // are not eliminated sometime during scheduling. The output dependence edge 395 // is also useful if output latency exceeds def-use latency. 396 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 397 if (DefI == VRegDefs.end()) 398 VRegDefs.insert(VReg2SUnit(Reg, SU)); 399 else { 400 SUnit *DefSU = DefI->SU; 401 if (DefSU != SU && DefSU != &ExitSU) { 402 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 403 DefSU->getInstr()); 404 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 405 } 406 DefI->SU = SU; 407 } 408} 409 410/// addVRegUseDeps - Add a register data dependency if the instruction that 411/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 412/// register antidependency from this SUnit to instructions that occur later in 413/// the same scheduling region if they write the virtual register. 414/// 415/// TODO: Handle ExitSU "uses" properly. 416void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 417 MachineInstr *MI = SU->getInstr(); 418 unsigned Reg = MI->getOperand(OperIdx).getReg(); 419 420 // Lookup this operand's reaching definition. 421 assert(LIS && "vreg dependencies requires LiveIntervals"); 422 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); 423 LiveInterval *LI = &LIS->getInterval(Reg); 424 VNInfo *VNI = LI->getVNInfoBefore(UseIdx); 425 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 426 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 427 // Phis and other noninstructions (after coalescing) have a NULL Def. 428 if (Def) { 429 SUnit *DefSU = getSUnit(Def); 430 if (DefSU) { 431 // The reaching Def lives within this scheduling region. 432 // Create a data dependence. 433 // 434 // TODO: Handle "special" address latencies cleanly. 435 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 436 if (!UnitLatencies) { 437 // Adjust the dependence latency using operand def/use information, then 438 // allow the target to perform its own adjustments. 439 ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 440 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 441 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 442 } 443 SU->addPred(dep); 444 } 445 } 446 447 // Add antidependence to the following def of the vreg it uses. 448 VReg2SUnitMap::iterator DefI = findVRegDef(Reg); 449 if (DefI != VRegDefs.end() && DefI->SU != SU) 450 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 451} 452 453/// Create an SUnit for each real instruction, numbered in top-down toplological 454/// order. The instruction order A < B, implies that no edge exists from B to A. 455/// 456/// Map each real instruction to its SUnit. 457/// 458/// After initSUnits, the SUnits vector is cannot be resized and the scheduler 459/// may hang onto SUnit pointers. We may relax this in the future by using SUnit 460/// IDs instead of pointers. 461void ScheduleDAGInstrs::initSUnits() { 462 // We'll be allocating one SUnit for each real instruction in the region, 463 // which is contained within a basic block. 464 SUnits.reserve(BB->size()); 465 466 for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { 467 MachineInstr *MI = I; 468 if (MI->isDebugValue()) 469 continue; 470 471 SUnit *SU = NewSUnit(MI); 472 MISUnitMap[MI] = SU; 473 474 SU->isCall = MI->isCall(); 475 SU->isCommutable = MI->isCommutable(); 476 477 // Assign the Latency field of SU using target-provided information. 478 if (UnitLatencies) 479 SU->Latency = 1; 480 else 481 ComputeLatency(SU); 482 } 483} 484 485void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 486 // Create an SUnit for each real instruction. 487 initSUnits(); 488 489 // We build scheduling units by walking a block's instruction list from bottom 490 // to top. 491 492 // Remember where a generic side-effecting instruction is as we procede. 493 SUnit *BarrierChain = 0, *AliasChain = 0; 494 495 // Memory references to specific known memory locations are tracked 496 // so that they can be given more precise dependencies. We track 497 // separately the known memory locations that may alias and those 498 // that are known not to alias 499 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 500 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 501 502 // Remove any stale debug info; sometimes BuildSchedGraph is called again 503 // without emitting the info from the previous call. 504 DbgValues.clear(); 505 FirstDbgValue = NULL; 506 507 assert(Defs.empty() && Uses.empty() && 508 "Only BuildGraph should update Defs/Uses"); 509 Defs.setRegLimit(TRI->getNumRegs()); 510 Uses.setRegLimit(TRI->getNumRegs()); 511 512 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 513 // FIXME: Allow SparseSet to reserve space for the creation of virtual 514 // registers during scheduling. Don't artificially inflate the Universe 515 // because we want to assert that vregs are not created during DAG building. 516 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 517 518 // Model data dependencies between instructions being scheduled and the 519 // ExitSU. 520 AddSchedBarrierDeps(); 521 522 // Walk the list of instructions, from bottom moving up. 523 MachineInstr *PrevMI = NULL; 524 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 525 MII != MIE; --MII) { 526 MachineInstr *MI = prior(MII); 527 if (MI && PrevMI) { 528 DbgValues.push_back(std::make_pair(PrevMI, MI)); 529 PrevMI = NULL; 530 } 531 532 if (MI->isDebugValue()) { 533 PrevMI = MI; 534 continue; 535 } 536 537 assert(!MI->isTerminator() && !MI->isLabel() && 538 "Cannot schedule terminators or labels!"); 539 540 SUnit *SU = MISUnitMap[MI]; 541 assert(SU && "No SUnit mapped to this MI"); 542 543 // Add register-based dependencies (data, anti, and output). 544 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 545 const MachineOperand &MO = MI->getOperand(j); 546 if (!MO.isReg()) continue; 547 unsigned Reg = MO.getReg(); 548 if (Reg == 0) continue; 549 550 if (TRI->isPhysicalRegister(Reg)) 551 addPhysRegDeps(SU, j); 552 else { 553 assert(!IsPostRA && "Virtual register encountered!"); 554 if (MO.isDef()) 555 addVRegDefDeps(SU, j); 556 else if (MO.readsReg()) // ignore undef operands 557 addVRegUseDeps(SU, j); 558 } 559 } 560 561 // Add chain dependencies. 562 // Chain dependencies used to enforce memory order should have 563 // latency of 0 (except for true dependency of Store followed by 564 // aliased Load... we estimate that with a single cycle of latency 565 // assuming the hardware will bypass) 566 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 567 // after stack slots are lowered to actual addresses. 568 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 569 // produce more precise dependence information. 570#define STORE_LOAD_LATENCY 1 571 unsigned TrueMemOrderLatency = 0; 572 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 573 (MI->hasVolatileMemoryRef() && 574 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 575 // Be conservative with these and add dependencies on all memory 576 // references, even those that are known to not alias. 577 for (std::map<const Value *, SUnit *>::iterator I = 578 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 579 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 580 } 581 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 582 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 583 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 584 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 585 } 586 NonAliasMemDefs.clear(); 587 NonAliasMemUses.clear(); 588 // Add SU to the barrier chain. 589 if (BarrierChain) 590 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 591 BarrierChain = SU; 592 593 // fall-through 594 new_alias_chain: 595 // Chain all possibly aliasing memory references though SU. 596 if (AliasChain) 597 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 598 AliasChain = SU; 599 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 600 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 601 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 602 E = AliasMemDefs.end(); I != E; ++I) { 603 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 604 } 605 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 606 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 607 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 608 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 609 } 610 PendingLoads.clear(); 611 AliasMemDefs.clear(); 612 AliasMemUses.clear(); 613 } else if (MI->mayStore()) { 614 bool MayAlias = true; 615 TrueMemOrderLatency = STORE_LOAD_LATENCY; 616 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 617 // A store to a specific PseudoSourceValue. Add precise dependencies. 618 // Record the def in MemDefs, first adding a dep if there is 619 // an existing def. 620 std::map<const Value *, SUnit *>::iterator I = 621 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 622 std::map<const Value *, SUnit *>::iterator IE = 623 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 624 if (I != IE) { 625 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 626 /*isNormalMemory=*/true)); 627 I->second = SU; 628 } else { 629 if (MayAlias) 630 AliasMemDefs[V] = SU; 631 else 632 NonAliasMemDefs[V] = SU; 633 } 634 // Handle the uses in MemUses, if there are any. 635 std::map<const Value *, std::vector<SUnit *> >::iterator J = 636 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 637 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 638 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 639 if (J != JE) { 640 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 641 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 642 /*Reg=*/0, /*isNormalMemory=*/true)); 643 J->second.clear(); 644 } 645 if (MayAlias) { 646 // Add dependencies from all the PendingLoads, i.e. loads 647 // with no underlying object. 648 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 649 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 650 // Add dependence on alias chain, if needed. 651 if (AliasChain) 652 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 653 } 654 // Add dependence on barrier chain, if needed. 655 if (BarrierChain) 656 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 657 } else { 658 // Treat all other stores conservatively. 659 goto new_alias_chain; 660 } 661 662 if (!ExitSU.isPred(SU)) 663 // Push store's up a bit to avoid them getting in between cmp 664 // and branches. 665 ExitSU.addPred(SDep(SU, SDep::Order, 0, 666 /*Reg=*/0, /*isNormalMemory=*/false, 667 /*isMustAlias=*/false, 668 /*isArtificial=*/true)); 669 } else if (MI->mayLoad()) { 670 bool MayAlias = true; 671 TrueMemOrderLatency = 0; 672 if (MI->isInvariantLoad(AA)) { 673 // Invariant load, no chain dependencies needed! 674 } else { 675 if (const Value *V = 676 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 677 // A load from a specific PseudoSourceValue. Add precise dependencies. 678 std::map<const Value *, SUnit *>::iterator I = 679 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 680 std::map<const Value *, SUnit *>::iterator IE = 681 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 682 if (I != IE) 683 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 684 /*isNormalMemory=*/true)); 685 if (MayAlias) 686 AliasMemUses[V].push_back(SU); 687 else 688 NonAliasMemUses[V].push_back(SU); 689 } else { 690 // A load with no underlying object. Depend on all 691 // potentially aliasing stores. 692 for (std::map<const Value *, SUnit *>::iterator I = 693 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 694 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 695 696 PendingLoads.push_back(SU); 697 MayAlias = true; 698 } 699 700 // Add dependencies on alias and barrier chains, if needed. 701 if (MayAlias && AliasChain) 702 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 703 if (BarrierChain) 704 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 705 } 706 } 707 } 708 if (PrevMI) 709 FirstDbgValue = PrevMI; 710 711 Defs.clear(); 712 Uses.clear(); 713 VRegDefs.clear(); 714 PendingLoads.clear(); 715 MISUnitMap.clear(); 716} 717 718void ScheduleDAGInstrs::FinishBlock() { 719 // Nothing to do. 720} 721 722void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 723 // Compute the latency for the node. 724 if (!InstrItins || InstrItins->isEmpty()) { 725 SU->Latency = 1; 726 727 // Simplistic target-independent heuristic: assume that loads take 728 // extra time. 729 if (SU->getInstr()->mayLoad()) 730 SU->Latency += 2; 731 } else { 732 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 733 } 734} 735 736void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 737 SDep& dep) const { 738 if (!InstrItins || InstrItins->isEmpty()) 739 return; 740 741 // For a data dependency with a known register... 742 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 743 return; 744 745 const unsigned Reg = dep.getReg(); 746 747 // ... find the definition of the register in the defining 748 // instruction 749 MachineInstr *DefMI = Def->getInstr(); 750 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 751 if (DefIdx != -1) { 752 const MachineOperand &MO = DefMI->getOperand(DefIdx); 753 if (MO.isReg() && MO.isImplicit() && 754 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 755 // This is an implicit def, getOperandLatency() won't return the correct 756 // latency. e.g. 757 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 758 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 759 // What we want is to compute latency between def of %D6/%D7 and use of 760 // %Q3 instead. 761 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 762 if (DefMI->getOperand(Op2).isReg()) 763 DefIdx = Op2; 764 } 765 MachineInstr *UseMI = Use->getInstr(); 766 // For all uses of the register, calculate the maxmimum latency 767 int Latency = -1; 768 if (UseMI) { 769 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 770 const MachineOperand &MO = UseMI->getOperand(i); 771 if (!MO.isReg() || !MO.isUse()) 772 continue; 773 unsigned MOReg = MO.getReg(); 774 if (MOReg != Reg) 775 continue; 776 777 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 778 UseMI, i); 779 Latency = std::max(Latency, UseCycle); 780 } 781 } else { 782 // UseMI is null, then it must be a scheduling barrier. 783 if (!InstrItins || InstrItins->isEmpty()) 784 return; 785 unsigned DefClass = DefMI->getDesc().getSchedClass(); 786 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 787 } 788 789 // If we found a latency, then replace the existing dependence latency. 790 if (Latency >= 0) 791 dep.setLatency(Latency); 792 } 793} 794 795void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 796 SU->getInstr()->dump(); 797} 798 799std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 800 std::string s; 801 raw_string_ostream oss(s); 802 if (SU == &EntrySU) 803 oss << "<entry>"; 804 else if (SU == &ExitSU) 805 oss << "<exit>"; 806 else 807 SU->getInstr()->print(oss); 808 return oss.str(); 809} 810 811// EmitSchedule - Emit the machine code in scheduled order. 812MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { 813 Begin = InsertPos; 814 815 // If first instruction was a DBG_VALUE then put it back. 816 if (FirstDbgValue) 817 BB->splice(InsertPos, BB, FirstDbgValue); 818 819 // Then re-insert them according to the given schedule. 820 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 821 if (SUnit *SU = Sequence[i]) 822 BB->splice(InsertPos, BB, SU->getInstr()); 823 else 824 // Null SUnit* is a noop. 825 EmitNoop(); 826 827 // Update the Begin iterator, as the first instruction in the block 828 // may have been scheduled later. 829 if (i == 0) 830 Begin = prior(InsertPos); 831 } 832 833 // Reinsert any remaining debug_values. 834 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 835 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 836 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 837 MachineInstr *DbgValue = P.first; 838 MachineBasicBlock::iterator OrigPrivMI = P.second; 839 BB->splice(++OrigPrivMI, BB, DbgValue); 840 } 841 DbgValues.clear(); 842 FirstDbgValue = NULL; 843 return BB; 844} 845