ScheduleDAGInstrs.cpp revision cc77b54036cfaaab9e0d9f2c731efbd2b7753cc9
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAGInstrs class, which implements re-scheduling 11// of MachineInstrs. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "sched-instrs" 16#include "ScheduleDAGInstrs.h" 17#include "llvm/Operator.h" 18#include "llvm/Analysis/AliasAnalysis.h" 19#include "llvm/Analysis/ValueTracking.h" 20#include "llvm/CodeGen/LiveIntervalAnalysis.h" 21#include "llvm/CodeGen/MachineFunctionPass.h" 22#include "llvm/CodeGen/MachineMemOperand.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/MC/MCInstrItineraries.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetRegisterInfo.h" 29#include "llvm/Target/TargetSubtargetInfo.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/ADT/SmallSet.h" 33using namespace llvm; 34 35ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 36 const MachineLoopInfo &mli, 37 const MachineDominatorTree &mdt, 38 bool IsPostRAFlag, 39 LiveIntervals *lis) 40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), 41 InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), 42 LIS(lis), UnitLatencies(false), 43 Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), 44 LoopRegs(MLI, MDT), FirstDbgValue(0) { 45 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 46 DbgValues.clear(); 47 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 48 "Virtual registers must be removed prior to PostRA scheduling"); 49} 50 51/// Run - perform scheduling. 52/// 53void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, 54 MachineBasicBlock::iterator begin, 55 MachineBasicBlock::iterator end, 56 unsigned endcount) { 57 BB = bb; 58 Begin = begin; 59 InsertPosIndex = endcount; 60 61 // Check to see if the scheduler cares about latencies. 62 UnitLatencies = ForceUnitLatencies(); 63 64 ScheduleDAG::Run(bb, end); 65} 66 67/// getUnderlyingObjectFromInt - This is the function that does the work of 68/// looking through basic ptrtoint+arithmetic+inttoptr sequences. 69static const Value *getUnderlyingObjectFromInt(const Value *V) { 70 do { 71 if (const Operator *U = dyn_cast<Operator>(V)) { 72 // If we find a ptrtoint, we can transfer control back to the 73 // regular getUnderlyingObjectFromInt. 74 if (U->getOpcode() == Instruction::PtrToInt) 75 return U->getOperand(0); 76 // If we find an add of a constant or a multiplied value, it's 77 // likely that the other operand will lead us to the base 78 // object. We don't have to worry about the case where the 79 // object address is somehow being computed by the multiply, 80 // because our callers only care when the result is an 81 // identifibale object. 82 if (U->getOpcode() != Instruction::Add || 83 (!isa<ConstantInt>(U->getOperand(1)) && 84 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) 85 return V; 86 V = U->getOperand(0); 87 } else { 88 return V; 89 } 90 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 91 } while (1); 92} 93 94/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject 95/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 96static const Value *getUnderlyingObject(const Value *V) { 97 // First just call Value::getUnderlyingObject to let it do what it does. 98 do { 99 V = GetUnderlyingObject(V); 100 // If it found an inttoptr, use special code to continue climing. 101 if (Operator::getOpcode(V) != Instruction::IntToPtr) 102 break; 103 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 104 // If that succeeded in finding a pointer, continue the search. 105 if (!O->getType()->isPointerTy()) 106 break; 107 V = O; 108 } while (1); 109 return V; 110} 111 112/// getUnderlyingObjectForInstr - If this machine instr has memory reference 113/// information and it can be tracked to a normal reference to a known 114/// object, return the Value for that object. Otherwise return null. 115static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, 116 const MachineFrameInfo *MFI, 117 bool &MayAlias) { 118 MayAlias = true; 119 if (!MI->hasOneMemOperand() || 120 !(*MI->memoperands_begin())->getValue() || 121 (*MI->memoperands_begin())->isVolatile()) 122 return 0; 123 124 const Value *V = (*MI->memoperands_begin())->getValue(); 125 if (!V) 126 return 0; 127 128 V = getUnderlyingObject(V); 129 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 130 // For now, ignore PseudoSourceValues which may alias LLVM IR values 131 // because the code that uses this function has no way to cope with 132 // such aliases. 133 if (PSV->isAliased(MFI)) 134 return 0; 135 136 MayAlias = PSV->mayAlias(MFI); 137 return V; 138 } 139 140 if (isIdentifiedObject(V)) 141 return V; 142 143 return 0; 144} 145 146void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { 147 LoopRegs.Deps.clear(); 148 if (MachineLoop *ML = MLI.getLoopFor(BB)) 149 if (BB == ML->getLoopLatch()) 150 LoopRegs.VisitLoop(ML); 151} 152 153/// AddSchedBarrierDeps - Add dependencies from instructions in the current 154/// list of instructions being scheduled to scheduling barrier by adding 155/// the exit SU to the register defs and use list. This is because we want to 156/// make sure instructions which define registers that are either used by 157/// the terminator or are live-out are properly scheduled. This is 158/// especially important when the definition latency of the return value(s) 159/// are too high to be hidden by the branch or when the liveout registers 160/// used by instructions in the fallthrough block. 161void ScheduleDAGInstrs::AddSchedBarrierDeps() { 162 MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; 163 ExitSU.setInstr(ExitMI); 164 bool AllDepKnown = ExitMI && 165 (ExitMI->isCall() || ExitMI->isBarrier()); 166 if (ExitMI && AllDepKnown) { 167 // If it's a call or a barrier, add dependencies on the defs and uses of 168 // instruction. 169 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 170 const MachineOperand &MO = ExitMI->getOperand(i); 171 if (!MO.isReg() || MO.isDef()) continue; 172 unsigned Reg = MO.getReg(); 173 if (Reg == 0) continue; 174 175 if (TRI->isPhysicalRegister(Reg)) 176 Uses[Reg].push_back(&ExitSU); 177 else 178 assert(!IsPostRA && "Virtual register encountered after regalloc."); 179 } 180 } else { 181 // For others, e.g. fallthrough, conditional branch, assume the exit 182 // uses all the registers that are livein to the successor blocks. 183 SmallSet<unsigned, 8> Seen; 184 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 185 SE = BB->succ_end(); SI != SE; ++SI) 186 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 187 E = (*SI)->livein_end(); I != E; ++I) { 188 unsigned Reg = *I; 189 if (Seen.insert(Reg)) 190 Uses[Reg].push_back(&ExitSU); 191 } 192 } 193} 194 195/// addPhysRegDeps - Add register dependencies (data, anti, and output) from 196/// this SUnit to following instructions in the same scheduling region that 197/// depend the physical register referenced at OperIdx. 198void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 199 const MachineInstr *MI = SU->getInstr(); 200 const MachineOperand &MO = MI->getOperand(OperIdx); 201 unsigned Reg = MO.getReg(); 202 203 // Ask the target if address-backscheduling is desirable, and if so how much. 204 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 205 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); 206 207 // Optionally add output and anti dependencies. For anti 208 // dependencies we use a latency of 0 because for a multi-issue 209 // target we want to allow the defining instruction to issue 210 // in the same cycle as the using instruction. 211 // TODO: Using a latency of 1 here for output dependencies assumes 212 // there's no cost for reusing registers. 213 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 214 for (const unsigned *Alias = TRI->getOverlaps(Reg); *Alias; ++Alias) { 215 std::vector<SUnit *> &DefList = Defs[*Alias]; 216 for (unsigned i = 0, e = DefList.size(); i != e; ++i) { 217 SUnit *DefSU = DefList[i]; 218 if (DefSU == &ExitSU) 219 continue; 220 if (DefSU != SU && 221 (Kind != SDep::Output || !MO.isDead() || 222 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 223 if (Kind == SDep::Anti) 224 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); 225 else { 226 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, 227 DefSU->getInstr()); 228 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); 229 } 230 } 231 } 232 } 233 234 // Retrieve the UseList to add data dependencies and update uses. 235 std::vector<SUnit *> &UseList = Uses[Reg]; 236 if (MO.isDef()) { 237 // Update DefList. Defs are pushed in the order they are visited and 238 // never reordered. 239 std::vector<SUnit *> &DefList = Defs[Reg]; 240 241 // Add any data dependencies. 242 unsigned DataLatency = SU->Latency; 243 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 244 SUnit *UseSU = UseList[i]; 245 if (UseSU == SU) 246 continue; 247 unsigned LDataLatency = DataLatency; 248 // Optionally add in a special extra latency for nodes that 249 // feed addresses. 250 // TODO: Do this for register aliases too. 251 // TODO: Perhaps we should get rid of 252 // SpecialAddressLatency and just move this into 253 // adjustSchedDependency for the targets that care about it. 254 if (SpecialAddressLatency != 0 && !UnitLatencies && 255 UseSU != &ExitSU) { 256 MachineInstr *UseMI = UseSU->getInstr(); 257 const MCInstrDesc &UseMCID = UseMI->getDesc(); 258 int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); 259 assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); 260 if (RegUseIndex >= 0 && 261 (UseMI->mayLoad() || UseMI->mayStore()) && 262 (unsigned)RegUseIndex < UseMCID.getNumOperands() && 263 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) 264 LDataLatency += SpecialAddressLatency; 265 } 266 // Adjust the dependence latency using operand def/use 267 // information (if any), and then allow the target to 268 // perform its own adjustments. 269 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); 270 if (!UnitLatencies) { 271 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 272 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 273 } 274 UseSU->addPred(dep); 275 } 276 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 277 std::vector<SUnit *> &UseList = Uses[*Alias]; 278 for (unsigned i = 0, e = UseList.size(); i != e; ++i) { 279 SUnit *UseSU = UseList[i]; 280 if (UseSU == SU) 281 continue; 282 const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); 283 if (!UnitLatencies) { 284 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); 285 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); 286 } 287 UseSU->addPred(dep); 288 } 289 } 290 291 // If a def is going to wrap back around to the top of the loop, 292 // backschedule it. 293 if (!UnitLatencies && DefList.empty()) { 294 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); 295 if (I != LoopRegs.Deps.end()) { 296 const MachineOperand *UseMO = I->second.first; 297 unsigned Count = I->second.second; 298 const MachineInstr *UseMI = UseMO->getParent(); 299 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); 300 const MCInstrDesc &UseMCID = UseMI->getDesc(); 301 // TODO: If we knew the total depth of the region here, we could 302 // handle the case where the whole loop is inside the region but 303 // is large enough that the isScheduleHigh trick isn't needed. 304 if (UseMOIdx < UseMCID.getNumOperands()) { 305 // Currently, we only support scheduling regions consisting of 306 // single basic blocks. Check to see if the instruction is in 307 // the same region by checking to see if it has the same parent. 308 if (UseMI->getParent() != MI->getParent()) { 309 unsigned Latency = SU->Latency; 310 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) 311 Latency += SpecialAddressLatency; 312 // This is a wild guess as to the portion of the latency which 313 // will be overlapped by work done outside the current 314 // scheduling region. 315 Latency -= std::min(Latency, Count); 316 // Add the artificial edge. 317 ExitSU.addPred(SDep(SU, SDep::Order, Latency, 318 /*Reg=*/0, /*isNormalMemory=*/false, 319 /*isMustAlias=*/false, 320 /*isArtificial=*/true)); 321 } else if (SpecialAddressLatency > 0 && 322 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { 323 // The entire loop body is within the current scheduling region 324 // and the latency of this operation is assumed to be greater 325 // than the latency of the loop. 326 // TODO: Recursively mark data-edge predecessors as 327 // isScheduleHigh too. 328 SU->isScheduleHigh = true; 329 } 330 } 331 LoopRegs.Deps.erase(I); 332 } 333 } 334 335 UseList.clear(); 336 if (!MO.isDead()) 337 DefList.clear(); 338 339 // Calls will not be reordered because of chain dependencies (see 340 // below). Since call operands are dead, calls may continue to be added 341 // to the DefList making dependence checking quadratic in the size of 342 // the block. Instead, we leave only one call at the back of the 343 // DefList. 344 if (SU->isCall) { 345 while (!DefList.empty() && DefList.back()->isCall) 346 DefList.pop_back(); 347 } 348 DefList.push_back(SU); 349 } else { 350 UseList.push_back(SU); 351 } 352} 353 354/// addVRegDefDeps - Add register output and data dependencies from this SUnit 355/// to instructions that occur later in the same scheduling region if they read 356/// from or write to the virtual register defined at OperIdx. 357/// 358/// TODO: Hoist loop induction variable increments. This has to be 359/// reevaluated. Generally, IV scheduling should be done before coalescing. 360void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 361 const MachineInstr *MI = SU->getInstr(); 362 unsigned Reg = MI->getOperand(OperIdx).getReg(); 363 364 // SSA defs do not have output/anti dependencies. 365 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) 366 return; 367 368 // Add output dependence to the next nearest def of this vreg. 369 // 370 // Unless this definition is dead, the output dependence should be 371 // transitively redundant with antidependencies from this definition's 372 // uses. We're conservative for now until we have a way to guarantee the uses 373 // are not eliminated sometime during scheduling. The output dependence edge 374 // is also useful if output latency exceeds def-use latency. 375 SUnit *&DefSU = VRegDefs[Reg]; 376 if (DefSU && DefSU != SU && DefSU != &ExitSU) { 377 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, 378 DefSU->getInstr()); 379 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); 380 } 381 DefSU = SU; 382} 383 384/// addVRegUseDeps - Add a register data dependency if the instruction that 385/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 386/// register antidependency from this SUnit to instructions that occur later in 387/// the same scheduling region if they write the virtual register. 388/// 389/// TODO: Handle ExitSU "uses" properly. 390void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 391 MachineInstr *MI = SU->getInstr(); 392 unsigned Reg = MI->getOperand(OperIdx).getReg(); 393 394 // Lookup this operand's reaching definition. 395 assert(LIS && "vreg dependencies requires LiveIntervals"); 396 SlotIndex UseIdx = LIS->getSlotIndexes()->getInstructionIndex(MI); 397 LiveInterval *LI = &LIS->getInterval(Reg); 398 VNInfo *VNI = LI->getVNInfoAt(UseIdx); 399 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 400 if (Def) { 401 SUnit *DefSU = getSUnit(Def); 402 if (DefSU) { 403 // The reaching Def lives within this scheduling region. 404 // Create a data dependence. 405 // 406 // TODO: Handle "special" address latencies cleanly. 407 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); 408 if (!UnitLatencies) { 409 // Adjust the dependence latency using operand def/use information, then 410 // allow the target to perform its own adjustments. 411 ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); 412 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 413 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 414 } 415 SU->addPred(dep); 416 } 417 } 418 419 // Add antidependence to the following def of the vreg it uses. 420 DenseMap<unsigned, SUnit*>::const_iterator I = VRegDefs.find(Reg); 421 if (I != VRegDefs.end()) { 422 SUnit *DefSU = I->second; 423 if (DefSU != SU) 424 DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg)); 425 } 426} 427 428/// Create an SUnit for each real instruction, numbered in top-down toplological 429/// order. The instruction order A < B, implies that no edge exists from B to A. 430/// 431/// Map each real instruction to its SUnit. 432/// 433/// After initSUnits, the SUnits vector is cannot be resized and the scheduler 434/// may hang onto SUnit pointers. We may relax this in the future by using SUnit 435/// IDs instead of pointers. 436void ScheduleDAGInstrs::initSUnits() { 437 // We'll be allocating one SUnit for each real instruction in the region, 438 // which is contained within a basic block. 439 SUnits.reserve(BB->size()); 440 441 for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { 442 MachineInstr *MI = I; 443 if (MI->isDebugValue()) 444 continue; 445 446 SUnit *SU = NewSUnit(MI); 447 MISUnitMap[MI] = SU; 448 449 SU->isCall = MI->isCall(); 450 SU->isCommutable = MI->isCommutable(); 451 452 // Assign the Latency field of SU using target-provided information. 453 if (UnitLatencies) 454 SU->Latency = 1; 455 else 456 ComputeLatency(SU); 457 } 458} 459 460void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { 461 // Create an SUnit for each real instruction. 462 initSUnits(); 463 464 // We build scheduling units by walking a block's instruction list from bottom 465 // to top. 466 467 // Remember where a generic side-effecting instruction is as we procede. 468 SUnit *BarrierChain = 0, *AliasChain = 0; 469 470 // Memory references to specific known memory locations are tracked 471 // so that they can be given more precise dependencies. We track 472 // separately the known memory locations that may alias and those 473 // that are known not to alias 474 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 475 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 476 477 // Remove any stale debug info; sometimes BuildSchedGraph is called again 478 // without emitting the info from the previous call. 479 DbgValues.clear(); 480 FirstDbgValue = NULL; 481 482 // Model data dependencies between instructions being scheduled and the 483 // ExitSU. 484 AddSchedBarrierDeps(); 485 486 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 487 assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); 488 } 489 490 assert(VRegDefs.size() == 0 && "Only BuildSchedGraph may access VRegDefs"); 491 492 // Walk the list of instructions, from bottom moving up. 493 MachineInstr *PrevMI = NULL; 494 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; 495 MII != MIE; --MII) { 496 MachineInstr *MI = prior(MII); 497 if (MI && PrevMI) { 498 DbgValues.push_back(std::make_pair(PrevMI, MI)); 499 PrevMI = NULL; 500 } 501 502 if (MI->isDebugValue()) { 503 PrevMI = MI; 504 continue; 505 } 506 507 assert(!MI->isTerminator() && !MI->isLabel() && 508 "Cannot schedule terminators or labels!"); 509 510 SUnit *SU = MISUnitMap[MI]; 511 assert(SU && "No SUnit mapped to this MI"); 512 513 // Add register-based dependencies (data, anti, and output). 514 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 515 const MachineOperand &MO = MI->getOperand(j); 516 if (!MO.isReg()) continue; 517 unsigned Reg = MO.getReg(); 518 if (Reg == 0) continue; 519 520 if (TRI->isPhysicalRegister(Reg)) 521 addPhysRegDeps(SU, j); 522 else { 523 assert(!IsPostRA && "Virtual register encountered!"); 524 if (MO.isDef()) 525 addVRegDefDeps(SU, j); 526 else 527 addVRegUseDeps(SU, j); 528 } 529 } 530 531 // Add chain dependencies. 532 // Chain dependencies used to enforce memory order should have 533 // latency of 0 (except for true dependency of Store followed by 534 // aliased Load... we estimate that with a single cycle of latency 535 // assuming the hardware will bypass) 536 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 537 // after stack slots are lowered to actual addresses. 538 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 539 // produce more precise dependence information. 540#define STORE_LOAD_LATENCY 1 541 unsigned TrueMemOrderLatency = 0; 542 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 543 (MI->hasVolatileMemoryRef() && 544 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { 545 // Be conservative with these and add dependencies on all memory 546 // references, even those that are known to not alias. 547 for (std::map<const Value *, SUnit *>::iterator I = 548 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 549 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 550 } 551 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 552 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 553 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 554 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 555 } 556 NonAliasMemDefs.clear(); 557 NonAliasMemUses.clear(); 558 // Add SU to the barrier chain. 559 if (BarrierChain) 560 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 561 BarrierChain = SU; 562 563 // fall-through 564 new_alias_chain: 565 // Chain all possibly aliasing memory references though SU. 566 if (AliasChain) 567 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 568 AliasChain = SU; 569 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 570 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 571 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 572 E = AliasMemDefs.end(); I != E; ++I) { 573 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 574 } 575 for (std::map<const Value *, std::vector<SUnit *> >::iterator I = 576 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 577 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 578 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 579 } 580 PendingLoads.clear(); 581 AliasMemDefs.clear(); 582 AliasMemUses.clear(); 583 } else if (MI->mayStore()) { 584 bool MayAlias = true; 585 TrueMemOrderLatency = STORE_LOAD_LATENCY; 586 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 587 // A store to a specific PseudoSourceValue. Add precise dependencies. 588 // Record the def in MemDefs, first adding a dep if there is 589 // an existing def. 590 std::map<const Value *, SUnit *>::iterator I = 591 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 592 std::map<const Value *, SUnit *>::iterator IE = 593 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 594 if (I != IE) { 595 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 596 /*isNormalMemory=*/true)); 597 I->second = SU; 598 } else { 599 if (MayAlias) 600 AliasMemDefs[V] = SU; 601 else 602 NonAliasMemDefs[V] = SU; 603 } 604 // Handle the uses in MemUses, if there are any. 605 std::map<const Value *, std::vector<SUnit *> >::iterator J = 606 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 607 std::map<const Value *, std::vector<SUnit *> >::iterator JE = 608 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 609 if (J != JE) { 610 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 611 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, 612 /*Reg=*/0, /*isNormalMemory=*/true)); 613 J->second.clear(); 614 } 615 if (MayAlias) { 616 // Add dependencies from all the PendingLoads, i.e. loads 617 // with no underlying object. 618 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 619 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); 620 // Add dependence on alias chain, if needed. 621 if (AliasChain) 622 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 623 } 624 // Add dependence on barrier chain, if needed. 625 if (BarrierChain) 626 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 627 } else { 628 // Treat all other stores conservatively. 629 goto new_alias_chain; 630 } 631 632 if (!ExitSU.isPred(SU)) 633 // Push store's up a bit to avoid them getting in between cmp 634 // and branches. 635 ExitSU.addPred(SDep(SU, SDep::Order, 0, 636 /*Reg=*/0, /*isNormalMemory=*/false, 637 /*isMustAlias=*/false, 638 /*isArtificial=*/true)); 639 } else if (MI->mayLoad()) { 640 bool MayAlias = true; 641 TrueMemOrderLatency = 0; 642 if (MI->isInvariantLoad(AA)) { 643 // Invariant load, no chain dependencies needed! 644 } else { 645 if (const Value *V = 646 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { 647 // A load from a specific PseudoSourceValue. Add precise dependencies. 648 std::map<const Value *, SUnit *>::iterator I = 649 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 650 std::map<const Value *, SUnit *>::iterator IE = 651 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 652 if (I != IE) 653 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, 654 /*isNormalMemory=*/true)); 655 if (MayAlias) 656 AliasMemUses[V].push_back(SU); 657 else 658 NonAliasMemUses[V].push_back(SU); 659 } else { 660 // A load with no underlying object. Depend on all 661 // potentially aliasing stores. 662 for (std::map<const Value *, SUnit *>::iterator I = 663 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 664 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 665 666 PendingLoads.push_back(SU); 667 MayAlias = true; 668 } 669 670 // Add dependencies on alias and barrier chains, if needed. 671 if (MayAlias && AliasChain) 672 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 673 if (BarrierChain) 674 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); 675 } 676 } 677 } 678 if (PrevMI) 679 FirstDbgValue = PrevMI; 680 681 for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { 682 Defs[i].clear(); 683 Uses[i].clear(); 684 } 685 VRegDefs.clear(); 686 PendingLoads.clear(); 687 MISUnitMap.clear(); 688} 689 690void ScheduleDAGInstrs::FinishBlock() { 691 // Nothing to do. 692} 693 694void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { 695 // Compute the latency for the node. 696 if (!InstrItins || InstrItins->isEmpty()) { 697 SU->Latency = 1; 698 699 // Simplistic target-independent heuristic: assume that loads take 700 // extra time. 701 if (SU->getInstr()->mayLoad()) 702 SU->Latency += 2; 703 } else { 704 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); 705 } 706} 707 708void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 709 SDep& dep) const { 710 if (!InstrItins || InstrItins->isEmpty()) 711 return; 712 713 // For a data dependency with a known register... 714 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) 715 return; 716 717 const unsigned Reg = dep.getReg(); 718 719 // ... find the definition of the register in the defining 720 // instruction 721 MachineInstr *DefMI = Def->getInstr(); 722 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); 723 if (DefIdx != -1) { 724 const MachineOperand &MO = DefMI->getOperand(DefIdx); 725 if (MO.isReg() && MO.isImplicit() && 726 DefIdx >= (int)DefMI->getDesc().getNumOperands()) { 727 // This is an implicit def, getOperandLatency() won't return the correct 728 // latency. e.g. 729 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> 730 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... 731 // What we want is to compute latency between def of %D6/%D7 and use of 732 // %Q3 instead. 733 DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); 734 } 735 MachineInstr *UseMI = Use->getInstr(); 736 // For all uses of the register, calculate the maxmimum latency 737 int Latency = -1; 738 if (UseMI) { 739 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 740 const MachineOperand &MO = UseMI->getOperand(i); 741 if (!MO.isReg() || !MO.isUse()) 742 continue; 743 unsigned MOReg = MO.getReg(); 744 if (MOReg != Reg) 745 continue; 746 747 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, 748 UseMI, i); 749 Latency = std::max(Latency, UseCycle); 750 } 751 } else { 752 // UseMI is null, then it must be a scheduling barrier. 753 if (!InstrItins || InstrItins->isEmpty()) 754 return; 755 unsigned DefClass = DefMI->getDesc().getSchedClass(); 756 Latency = InstrItins->getOperandCycle(DefClass, DefIdx); 757 } 758 759 // If we found a latency, then replace the existing dependence latency. 760 if (Latency >= 0) 761 dep.setLatency(Latency); 762 } 763} 764 765void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 766 SU->getInstr()->dump(); 767} 768 769std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 770 std::string s; 771 raw_string_ostream oss(s); 772 if (SU == &EntrySU) 773 oss << "<entry>"; 774 else if (SU == &ExitSU) 775 oss << "<exit>"; 776 else 777 SU->getInstr()->print(oss); 778 return oss.str(); 779} 780 781// EmitSchedule - Emit the machine code in scheduled order. 782MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { 783 Begin = InsertPos; 784 785 // If first instruction was a DBG_VALUE then put it back. 786 if (FirstDbgValue) 787 BB->splice(InsertPos, BB, FirstDbgValue); 788 789 // Then re-insert them according to the given schedule. 790 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 791 if (SUnit *SU = Sequence[i]) 792 BB->splice(InsertPos, BB, SU->getInstr()); 793 else 794 // Null SUnit* is a noop. 795 EmitNoop(); 796 797 // Update the Begin iterator, as the first instruction in the block 798 // may have been scheduled later. 799 if (i == 0) 800 Begin = prior(InsertPos); 801 } 802 803 // Reinsert any remaining debug_values. 804 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 805 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 806 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 807 MachineInstr *DbgValue = P.first; 808 MachineBasicBlock::iterator OrigPrivMI = P.second; 809 BB->splice(++OrigPrivMI, BB, DbgValue); 810 } 811 DbgValues.clear(); 812 FirstDbgValue = NULL; 813 return BB; 814} 815