ScheduleDAGInstrs.cpp revision b4566a999970b514d7c6973d99e293a6625d3f70
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "ScheduleDAGInstrs.h" 17#include "llvm/Operator.h" 18#include "llvm/Analysis/AliasAnalysis.h" 19#include "llvm/Analysis/ValueTracking.h" 20#include "llvm/CodeGen/LiveIntervalAnalysis.h" 21#include "llvm/CodeGen/MachineFunctionPass.h" 22#include "llvm/CodeGen/MachineMemOperand.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), 42 LIS(lis), UnitLatencies(false), 43 Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), 44 LoopRegs(MLI, MDT), FirstDbgValue(0) { 45 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 46 DbgValues.clear(); 47 assert(!(IsPostRA && MF.getRegInfo().getNumVirtRegs()) && 48 "Virtual registers must be removed prior to PostRA scheduling"); 49} 50 51/// Run - perform scheduling. 52/// 53void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, 54 MachineBasicBlock::iterator begin, 55 MachineBasicBlock::iterator end, 56 unsigned endcount) { 57 BB = bb; 58 Begin = begin; 59 InsertPosIndex = endcount; 60 61 // Check to see if the scheduler cares about latencies. 62 UnitLatencies = ForceUnitLatencies(); 63 64 ScheduleDAG::Run(bb, end); 65} 66 67/// getUnderlyingObjectFromInt - This is the function that does the work of 68/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 69static const Value *getUnderlyingObjectFromInt(const Value *V) { 70 do { 71 if (const Operator *U = dyn_cast<Operator>(V)) { 72 // If we find a ptrtoint, we can transfer control back to the 73 // regular getUnderlyingObjectFromInt. 74 if (U->getOpcode() == Instruction::PtrToInt) 75 return U->getOperand(0); 76 // If we find an add of a constant or a multiplied value, it's 77 // likely that the other operand will lead us to the base 78 // object. We don't have to worry about the case where the 79 // object address is somehow being computed by the multiply, 80 // because our callers only care when the result is an 81 // identifibale object. 82 if (U->getOpcode() != Instruction::Add || 83 (!isa<ConstantInt>(U->getOperand(1)) && 84 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 85 return V; 86 V = U->getOperand(0); 87 } else { 88 return V; 89 } 90 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 91 } while (1); 92} 93 94/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 95/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 96static const Value *getUnderlyingObject(const Value *V) { 97 // First just call Value::getUnderlyingObject to let it do what it does. 98 do { 99 V = GetUnderlyingObject(V); 100 // If it found an inttoptr, use special code to continue climing. 101 if (Operator::getOpcode(V) != Instruction::IntToPtr) 102 break; 103 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 104 // If that succeeded in finding a pointer, continue the search. 105 if (!O->getType()->isPointerTy()) 106 break; 107 V = O; 108 } while (1); 109 return V; 110} 111 112/// getUnderlyingObjectForInstr - If this machine instr has memory reference 113/// information and it can be tracked to a normal reference to a known 114/// object, return the Value for that object. Otherwise return null. 115static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 116 const MachineFrameInfo *MFI, 117 bool &MayAlias) { 118 MayAlias = true; 119 if (!MI->hasOneMemOperand() || 120 !(*MI->memoperands_begin())->getValue() || 121 (*MI->memoperands_begin())->isVolatile()) 122 return 0; 123 124 const Value *V = (*MI->memoperands_begin())->getValue(); 125 if (!V) 126 return 0; 127 128 V = getUnderlyingObject(V); 129 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 130 // For now, ignore PseudoSourceValues which may alias LLVM IR values 131 // because the code that uses this function has no way to cope with 132 // such aliases. 133 if (PSV->isAliased(MFI)) 134 return 0; 135 136 MayAlias = PSV->mayAlias(MFI); 137 return V; 138 } 139 140 if (isIdentifiedObject(V)) 141 return V; 142 143 return 0; 144} 145 146void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 147 LoopRegs.Deps.clear(); 148 if (MachineLoop *ML = MLI.getLoopFor(BB)) 149 if (BB == ML->getLoopLatch()) 150 LoopRegs.VisitLoop(ML); 151} 152 153/// AddSchedBarrierDeps - Add dependencies from instructions in the current 154/// list of instructions being scheduled to scheduling barrier by adding 155/// the exit SU to the register defs and use list. This is because we want to 156/// make sure instructions which define registers that are either used by 157/// the terminator or are live-out are properly scheduled. This is 158/// especially important when the definition latency of the return value(s) 159/// are too high to be hidden by the branch or when the liveout registers 160/// used by instructions in the fallthrough block. 161void ScheduleDAGInstrs::AddSchedBarrierDeps() { 162 MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; 163 ExitSU.setInstr(ExitMI); 164 bool AllDepKnown = ExitMI && 165 (ExitMI->isCall() || ExitMI->isBarrier()); 166 if (ExitMI && AllDepKnown) { 167 // If it's a call or a barrier, add dependencies on the defs and uses of 168 // instruction. 169 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 170 const MachineOperand &MO = ExitMI->getOperand(i); 171 if (!MO.isReg() || MO.isDef()) continue; 172 unsigned Reg = MO.getReg(); 173 if (Reg == 0) continue; 174 175 if (TRI->isPhysicalRegister(Reg)) 176 Uses[Reg].push_back(&ExitSU); 177 else 178 assert(!IsPostRA && "Virtual register encountered after regalloc."); 179 } 180 } else { 181 // For others, e.g. fallthrough, conditional branch, assume the exit 182 // uses all the registers that are livein to the successor blocks. 183 SmallSet<unsigned, 8> Seen; 184 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 185 SE = BB->succ_end(); SI != SE; ++SI) 186 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 187 E = (*SI)->livein_end(); I != E; ++I) { 188 unsigned Reg = *I; 189 if (Seen.insert(Reg)) 190 Uses[Reg].push_back(&ExitSU); 191 } 192 } 193} 194 195/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 196/// this SUnit to following instructions in the same scheduling region that 197/// depend the physical register referenced at OperIdx. 198void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 199 const MachineInstr *MI = SU->getInstr(); 200 const MachineOperand &MO = MI->getOperand(OperIdx); 201 unsigned Reg = MO.getReg(); 202 203 // Ask the target if address-backscheduling is desirable, and if so how much. 204 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 205 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 206 207 // Optionally add output and anti dependencies. For anti 208 // dependencies we use a latency of 0 because for a multi-issue 209 // target we want to allow the defining instruction to issue 210 // in the same cycle as the using instruction. 211 // TODO: Using a latency of 1 here for output dependencies assumes 212 // there's no cost for reusing registers. 213 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 214 for (const unsigned *Alias = TRI->getOverlaps(Reg); *Alias; ++Alias) { 215 std::vector<SUnit *> &DefList = Defs[*Alias]; 216 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 217 SUnit *DefSU = DefList[i]; 218 if (DefSU == &ExitSU) 219 continue; 220 if (DefSU != SU && 221 (Kind != SDep::Output || !MO.isDead() || 222 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 223 if (Kind == SDep::Anti) 224 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 225 else { 226 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 227 DefSU->getInstr()); 228 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 229 } 230 } 231 } 232 } 233 234 // Retrieve the UseList to add data dependencies and update uses. 235 std::vector<SUnit *> &UseList = Uses[Reg]; 236 if (MO.isDef()) { 237 // Update DefList. Defs are pushed in the order they are visited and 238 // never reordered. 239 std::vector<SUnit *> &DefList = Defs[Reg]; 240 241 // Add any data dependencies. 242 unsigned DataLatency = SU->Latency; 243 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 244 SUnit *UseSU = UseList[i]; 245 if (UseSU == SU) 246 continue; 247 unsigned LDataLatency = DataLatency; 248 // Optionally add in a special extra latency for nodes that 249 // feed addresses. 250 // TODO: Do this for register aliases too. 251 // TODO: Perhaps we should get rid of 252 // SpecialAddressLatency and just move this into 253 // adjustSchedDependency for the targets that care about it. 254 if (SpecialAddressLatency != 0 && !UnitLatencies && 255 UseSU != &ExitSU) { 256 MachineInstr *UseMI = UseSU->getInstr(); 257 const MCInstrDesc &UseMCID = UseMI->getDesc(); 258 int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); 259 assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); 260 if (RegUseIndex >= 0 && 261 (UseMI->mayLoad() || UseMI->mayStore()) && 262 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 263 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 264 LDataLatency += SpecialAddressLatency; 265 } 266 // Adjust the dependence latency using operand def/use 267 // information (if any), and then allow the target to 268 // perform its own adjustments. 269 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); 270 if (!UnitLatencies) { 271 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 272 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 273 } 274 UseSU->addPred(dep); 275 } 276 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 277 std::vector<SUnit *> &UseList = Uses[*Alias]; 278 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 279 SUnit *UseSU = UseList[i]; 280 if (UseSU == SU) 281 continue; 282 const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); 283 if (!UnitLatencies) { 284 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 285 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 286 } 287 UseSU->addPred(dep); 288 } 289 } 290 291 // If a def is going to wrap back around to the top of the loop, 292 // backschedule it. 293 if (!UnitLatencies && DefList.empty()) { 294 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); 295 if (I != LoopRegs.Deps.end()) { 296 const MachineOperand *UseMO = I->second.first; 297 unsigned Count = I->second.second; 298 const MachineInstr *UseMI = UseMO->getParent(); 299 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 300 const MCInstrDesc &UseMCID = UseMI->getDesc(); 301 // TODO: If we knew the total depth of the region here, we could 302 // handle the case where the whole loop is inside the region but 303 // is large enough that the isScheduleHigh trick isn't needed. 304 if (UseMOIdx < UseMCID.getNumOperands()) { 305 // Currently, we only support scheduling regions consisting of 306 // single basic blocks. Check to see if the instruction is in 307 // the same region by checking to see if it has the same parent. 308 if (UseMI->getParent() != MI->getParent()) { 309 unsigned Latency = SU->Latency; 310 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 311 Latency += SpecialAddressLatency; 312 // This is a wild guess as to the portion of the latency which 313 // will be overlapped by work done outside the current 314 // scheduling region. 315 Latency -= std::min(Latency, Count); 316 // Add the artificial edge. 317 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 318 /*Reg=*/0, /*isNormalMemory=*/false, 319 /*isMustAlias=*/false, 320 /*isArtificial=*/true)); 321 } else if (SpecialAddressLatency > 0 && 322 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 323 // The entire loop body is within the current scheduling region 324 // and the latency of this operation is assumed to be greater 325 // than the latency of the loop. 326 // TODO: Recursively mark data-edge predecessors as 327 // isScheduleHigh too. 328 SU->isScheduleHigh = true; 329 } 330 } 331 LoopRegs.Deps.erase(I); 332 } 333 } 334 335 UseList.clear(); 336 if (!MO.isDead()) 337 DefList.clear(); 338 339 // Calls will not be reordered because of chain dependencies (see 340 // below). Since call operands are dead, calls may continue to be added 341 // to the DefList making dependence checking quadratic in the size of 342 // the block. Instead, we leave only one call at the back of the 343 // DefList. 344 if (SU->isCall) { 345 while (!DefList.empty() && DefList.back()->isCall) 346 DefList.pop_back(); 347 } 348 DefList.push_back(SU); 349 } else { 350 UseList.push_back(SU); 351 } 352} 353 354/// addVRegDefDeps - Add register output and data dependencies from this SUnit 355/// to instructions that occur later in the same scheduling region if they read 356/// from or write to the virtual register defined at OperIdx. 357/// 358/// TODO: Hoist loop induction variable increments. This has to be 359/// reevaluated. Generally, IV scheduling should be done before coalescing. 360void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 361 const MachineInstr *MI = SU->getInstr(); 362 unsigned Reg = MI->getOperand(OperIdx).getReg(); 363 364 // Add output dependence to the next nearest def of this vreg. 365 // 366 // Unless this definition is dead, the output dependence should be 367 // transitively redundant with antidependencies from this definition's 368 // uses. We're conservative for now until we have a way to guarantee the uses 369 // are not eliminated sometime during scheduling. The output dependence edge 370 // is also useful if output latency exceeds def-use latency. 371 SUnit *&DefSU = VRegDefs[Reg]; 372 if (DefSU && DefSU != SU && DefSU != &ExitSU) { 373 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 374 DefSU->getInstr()); 375 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 376 } 377 DefSU = SU; 378} 379 380/// addVRegUseDeps - Add a register data dependency if the instruction that 381/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 382/// register antidependency from this SUnit to instructions that occur later in 383/// the same scheduling region if they write the virtual register. 384/// 385/// TODO: Handle ExitSU "uses" properly. 386void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 387 MachineInstr *MI = SU->getInstr(); 388 unsigned Reg = MI->getOperand(OperIdx).getReg(); 389 390 // Lookup this operand's reaching definition. 391 assert(LIS && "vreg dependencies requires LiveIntervals"); 392 SlotIndex UseIdx = LIS->getSlotIndexes()->getInstructionIndex(MI); 393 LiveInterval *LI = &LIS->getInterval(Reg); 394 VNInfo *VNI = LI->getVNInfoAt(UseIdx); 395 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 396 if (Def) { 397 SUnit *DefSU = getSUnit(Def); 398 if (DefSU) { 399 // The reaching Def lives within this scheduling region. 400 // Create a data dependence. 401 // 402 // TODO: Handle "special" address latencies cleanly. 403 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 404 if (!UnitLatencies) { 405 // Adjust the dependence latency using operand def/use information, then 406 // allow the target to perform its own adjustments. 407 ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 408 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 409 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 410 } 411 SU->addPred(dep); 412 } 413 } 414 415 // Add antidependence to the following def of the vreg it uses. 416 DenseMap<unsigned, SUnit*>::const_iterator I = VRegDefs.find(Reg); 417 if (I != VRegDefs.end()) { 418 SUnit *DefSU = I->second; 419 if (DefSU != SU) 420 DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 421 } 422} 423 424/// Create an SUnit for each real instruction, numbered in top-down toplological 425/// order. The instruction order A < B, implies that no edge exists from B to A. 426/// 427/// Map each real instruction to its SUnit. 428/// 429/// After initSUnits, the SUnits vector is cannot be resized and the scheduler 430/// may hang onto SUnit pointers. We may relax this in the future by using SUnit 431/// IDs instead of pointers. 432void ScheduleDAGInstrs::initSUnits() { 433 // We'll be allocating one SUnit for each real instruction in the region, 434 // which is contained within a basic block. 435 SUnits.reserve(BB->size()); 436 437 for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { 438 MachineInstr *MI = I; 439 if (MI->isDebugValue()) 440 continue; 441 442 SUnit *SU = NewSUnit(MI); 443 MISUnitMap[MI] = SU; 444 445 SU->isCall = MI->isCall(); 446 SU->isCommutable = MI->isCommutable(); 447 448 // Assign the Latency field of SU using target-provided information. 449 if (UnitLatencies) 450 SU->Latency = 1; 451 else 452 ComputeLatency(SU); 453 } 454} 455 456void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 457 // Create an SUnit for each real instruction. 458 initSUnits(); 459 460 // We build scheduling units by walking a block's instruction list from bottom 461 // to top. 462 463 // Remember where a generic side-effecting instruction is as we procede. 464 SUnit *BarrierChain = 0, *AliasChain = 0; 465 466 // Memory references to specific known memory locations are tracked 467 // so that they can be given more precise dependencies. We track 468 // separately the known memory locations that may alias and those 469 // that are known not to alias 470 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 471 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 472 473 // Remove any stale debug info; sometimes BuildSchedGraph is called again 474 // without emitting the info from the previous call. 475 DbgValues.clear(); 476 FirstDbgValue = NULL; 477 478 // Model data dependencies between instructions being scheduled and the 479 // ExitSU. 480 AddSchedBarrierDeps(); 481 482 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 483 assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); 484 } 485 486 assert(VRegDefs.size() == 0 && "Only BuildSchedGraph may access VRegDefs"); 487 488 // Walk the list of instructions, from bottom moving up. 489 MachineInstr *PrevMI = NULL; 490 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 491 MII != MIE; --MII) { 492 MachineInstr *MI = prior(MII); 493 if (MI && PrevMI) { 494 DbgValues.push_back(std::make_pair(PrevMI, MI)); 495 PrevMI = NULL; 496 } 497 498 if (MI->isDebugValue()) { 499 PrevMI = MI; 500 continue; 501 } 502 503 assert(!MI->isTerminator() && !MI->isLabel() && 504 "Cannot schedule terminators or labels!"); 505 506 SUnit *SU = MISUnitMap[MI]; 507 assert(SU && "No SUnit mapped to this MI"); 508 509 // Add register-based dependencies (data, anti, and output). 510 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 511 const MachineOperand &MO = MI->getOperand(j); 512 if (!MO.isReg()) continue; 513 unsigned Reg = MO.getReg(); 514 if (Reg == 0) continue; 515 516 if (TRI->isPhysicalRegister(Reg)) 517 addPhysRegDeps(SU, j); 518 else { 519 assert(!IsPostRA && "Virtual register encountered!"); 520 if (MO.isDef()) 521 addVRegDefDeps(SU, j); 522 else 523 addVRegUseDeps(SU, j); 524 } 525 } 526 527 // Add chain dependencies. 528 // Chain dependencies used to enforce memory order should have 529 // latency of 0 (except for true dependency of Store followed by 530 // aliased Load... we estimate that with a single cycle of latency 531 // assuming the hardware will bypass) 532 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 533 // after stack slots are lowered to actual addresses. 534 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 535 // produce more precise dependence information. 536#define STORE_LOAD_LATENCY 1 537 unsigned TrueMemOrderLatency = 0; 538 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 539 (MI->hasVolatileMemoryRef() && 540 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 541 // Be conservative with these and add dependencies on all memory 542 // references, even those that are known to not alias. 543 for (std::map<const Value *, SUnit *>::iterator I = 544 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 545 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 546 } 547 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 548 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 549 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 550 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 551 } 552 NonAliasMemDefs.clear(); 553 NonAliasMemUses.clear(); 554 // Add SU to the barrier chain. 555 if (BarrierChain) 556 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 557 BarrierChain = SU; 558 559 // fall-through 560 new_alias_chain: 561 // Chain all possibly aliasing memory references though SU. 562 if (AliasChain) 563 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 564 AliasChain = SU; 565 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 566 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 567 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 568 E = AliasMemDefs.end(); I != E; ++I) { 569 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 570 } 571 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 572 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 573 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 574 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 575 } 576 PendingLoads.clear(); 577 AliasMemDefs.clear(); 578 AliasMemUses.clear(); 579 } else if (MI->mayStore()) { 580 bool MayAlias = true; 581 TrueMemOrderLatency = STORE_LOAD_LATENCY; 582 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 583 // A store to a specific PseudoSourceValue. Add precise dependencies. 584 // Record the def in MemDefs, first adding a dep if there is 585 // an existing def. 586 std::map<const Value *, SUnit *>::iterator I = 587 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 588 std::map<const Value *, SUnit *>::iterator IE = 589 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 590 if (I != IE) { 591 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 592 /*isNormalMemory=*/true)); 593 I->second = SU; 594 } else { 595 if (MayAlias) 596 AliasMemDefs[V] = SU; 597 else 598 NonAliasMemDefs[V] = SU; 599 } 600 // Handle the uses in MemUses, if there are any. 601 std::map<const Value *, std::vector<SUnit *> >::iterator J = 602 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 603 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 604 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 605 if (J != JE) { 606 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 607 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 608 /*Reg=*/0, /*isNormalMemory=*/true)); 609 J->second.clear(); 610 } 611 if (MayAlias) { 612 // Add dependencies from all the PendingLoads, i.e. loads 613 // with no underlying object. 614 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 615 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 616 // Add dependence on alias chain, if needed. 617 if (AliasChain) 618 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 619 } 620 // Add dependence on barrier chain, if needed. 621 if (BarrierChain) 622 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 623 } else { 624 // Treat all other stores conservatively. 625 goto new_alias_chain; 626 } 627 628 if (!ExitSU.isPred(SU)) 629 // Push store's up a bit to avoid them getting in between cmp 630 // and branches. 631 ExitSU.addPred(SDep(SU, SDep::Order, 0, 632 /*Reg=*/0, /*isNormalMemory=*/false, 633 /*isMustAlias=*/false, 634 /*isArtificial=*/true)); 635 } else if (MI->mayLoad()) { 636 bool MayAlias = true; 637 TrueMemOrderLatency = 0; 638 if (MI->isInvariantLoad(AA)) { 639 // Invariant load, no chain dependencies needed! 640 } else { 641 if (const Value *V = 642 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 643 // A load from a specific PseudoSourceValue. Add precise dependencies. 644 std::map<const Value *, SUnit *>::iterator I = 645 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 646 std::map<const Value *, SUnit *>::iterator IE = 647 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 648 if (I != IE) 649 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 650 /*isNormalMemory=*/true)); 651 if (MayAlias) 652 AliasMemUses[V].push_back(SU); 653 else 654 NonAliasMemUses[V].push_back(SU); 655 } else { 656 // A load with no underlying object. Depend on all 657 // potentially aliasing stores. 658 for (std::map<const Value *, SUnit *>::iterator I = 659 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 660 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 661 662 PendingLoads.push_back(SU); 663 MayAlias = true; 664 } 665 666 // Add dependencies on alias and barrier chains, if needed. 667 if (MayAlias && AliasChain) 668 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 669 if (BarrierChain) 670 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 671 } 672 } 673 } 674 if (PrevMI) 675 FirstDbgValue = PrevMI; 676 677 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 678 Defs[i].clear(); 679 Uses[i].clear(); 680 } 681 VRegDefs.clear(); 682 PendingLoads.clear(); 683 MISUnitMap.clear(); 684} 685 686void ScheduleDAGInstrs::FinishBlock() { 687 // Nothing to do. 688} 689 690void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 691 // Compute the latency for the node. 692 if (!InstrItins || InstrItins->isEmpty()) { 693 SU->Latency = 1; 694 695 // Simplistic target-independent heuristic: assume that loads take 696 // extra time. 697 if (SU->getInstr()->mayLoad()) 698 SU->Latency += 2; 699 } else { 700 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 701 } 702} 703 704void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 705 SDep& dep) const { 706 if (!InstrItins || InstrItins->isEmpty()) 707 return; 708 709 // For a data dependency with a known register... 710 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 711 return; 712 713 const unsigned Reg = dep.getReg(); 714 715 // ... find the definition of the register in the defining 716 // instruction 717 MachineInstr *DefMI = Def->getInstr(); 718 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 719 if (DefIdx != -1) { 720 const MachineOperand &MO = DefMI->getOperand(DefIdx); 721 if (MO.isReg() && MO.isImplicit() && 722 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 723 // This is an implicit def, getOperandLatency() won't return the correct 724 // latency. e.g. 725 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 726 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 727 // What we want is to compute latency between def of %D6/%D7 and use of 728 // %Q3 instead. 729 DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 730 } 731 MachineInstr *UseMI = Use->getInstr(); 732 // For all uses of the register, calculate the maxmimum latency 733 int Latency = -1; 734 if (UseMI) { 735 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 736 const MachineOperand &MO = UseMI->getOperand(i); 737 if (!MO.isReg() || !MO.isUse()) 738 continue; 739 unsigned MOReg = MO.getReg(); 740 if (MOReg != Reg) 741 continue; 742 743 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 744 UseMI, i); 745 Latency = std::max(Latency, UseCycle); 746 } 747 } else { 748 // UseMI is null, then it must be a scheduling barrier. 749 if (!InstrItins || InstrItins->isEmpty()) 750 return; 751 unsigned DefClass = DefMI->getDesc().getSchedClass(); 752 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 753 } 754 755 // If we found a latency, then replace the existing dependence latency. 756 if (Latency >= 0) 757 dep.setLatency(Latency); 758 } 759} 760 761void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 762 SU->getInstr()->dump(); 763} 764 765std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 766 std::string s; 767 raw_string_ostream oss(s); 768 if (SU == &EntrySU) 769 oss << "<entry>"; 770 else if (SU == &ExitSU) 771 oss << "<exit>"; 772 else 773 SU->getInstr()->print(oss); 774 return oss.str(); 775} 776 777// EmitSchedule - Emit the machine code in scheduled order. 778MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { 779 Begin = InsertPos; 780 781 // If first instruction was a DBG_VALUE then put it back. 782 if (FirstDbgValue) 783 BB->splice(InsertPos, BB, FirstDbgValue); 784 785 // Then re-insert them according to the given schedule. 786 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 787 if (SUnit *SU = Sequence[i]) 788 BB->splice(InsertPos, BB, SU->getInstr()); 789 else 790 // Null SUnit* is a noop. 791 EmitNoop(); 792 793 // Update the Begin iterator, as the first instruction in the block 794 // may have been scheduled later. 795 if (i == 0) 796 Begin = prior(InsertPos); 797 } 798 799 // Reinsert any remaining debug_values. 800 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 801 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 802 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 803 MachineInstr *DbgValue = P.first; 804 MachineBasicBlock::iterator OrigPrivMI = P.second; 805 BB->splice(++OrigPrivMI, BB, DbgValue); 806 } 807 DbgValues.clear(); 808 FirstDbgValue = NULL; 809 return BB; 810} 811