ScheduleDAGSDNodes.cpp revision 3eb4319313b3fb9189cd4be5b3e5375be9bdc2f9
1//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAG class, which is a base class used by 11// scheduling implementation classes. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "pre-RA-sched" 16#include "SDNodeDbgValue.h" 17#include "ScheduleDAGSDNodes.h" 18#include "InstrEmitter.h" 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/Target/TargetMachine.h" 21#include "llvm/Target/TargetInstrInfo.h" 22#include "llvm/Target/TargetLowering.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetSubtarget.h" 25#include "llvm/ADT/DenseMap.h" 26#include "llvm/ADT/SmallPtrSet.h" 27#include "llvm/ADT/SmallSet.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/Support/CommandLine.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33using namespace llvm; 34 35STATISTIC(LoadsClustered, "Number of loads clustered together"); 36 37// This allows latency based scheduler to notice high latency instructions 38// without a target itinerary. The choise if number here has more to do with 39// balancing scheduler heursitics than with the actual machine latency. 40static cl::opt<int> HighLatencyCycles( 41 "sched-high-latency-cycles", cl::Hidden, cl::init(10), 42 cl::desc("Roughly estimate the number of cycles that 'long latency'" 43 "instructions take for targets with no itinerary")); 44 45ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) 46 : ScheduleDAG(mf), 47 InstrItins(mf.getTarget().getInstrItineraryData()) {} 48 49/// Run - perform scheduling. 50/// 51void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, 52 MachineBasicBlock::iterator insertPos) { 53 DAG = dag; 54 ScheduleDAG::Run(bb, insertPos); 55} 56 57/// NewSUnit - Creates a new SUnit and return a ptr to it. 58/// 59SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { 60#ifndef NDEBUG 61 const SUnit *Addr = 0; 62 if (!SUnits.empty()) 63 Addr = &SUnits[0]; 64#endif 65 SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); 66 assert((Addr == 0 || Addr == &SUnits[0]) && 67 "SUnits std::vector reallocated on the fly!"); 68 SUnits.back().OrigNode = &SUnits.back(); 69 SUnit *SU = &SUnits.back(); 70 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 71 if (!N || 72 (N->isMachineOpcode() && 73 N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) 74 SU->SchedulingPref = Sched::None; 75 else 76 SU->SchedulingPref = TLI.getSchedulingPreference(N); 77 return SU; 78} 79 80SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { 81 SUnit *SU = NewSUnit(Old->getNode()); 82 SU->OrigNode = Old->OrigNode; 83 SU->Latency = Old->Latency; 84 SU->isVRegCycle = Old->isVRegCycle; 85 SU->isCall = Old->isCall; 86 SU->isTwoAddress = Old->isTwoAddress; 87 SU->isCommutable = Old->isCommutable; 88 SU->hasPhysRegDefs = Old->hasPhysRegDefs; 89 SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; 90 SU->SchedulingPref = Old->SchedulingPref; 91 Old->isCloned = true; 92 return SU; 93} 94 95/// CheckForPhysRegDependency - Check if the dependency between def and use of 96/// a specified operand is a physical register dependency. If so, returns the 97/// register and the cost of copying the register. 98static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, 99 const TargetRegisterInfo *TRI, 100 const TargetInstrInfo *TII, 101 unsigned &PhysReg, int &Cost) { 102 if (Op != 2 || User->getOpcode() != ISD::CopyToReg) 103 return; 104 105 unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); 106 if (TargetRegisterInfo::isVirtualRegister(Reg)) 107 return; 108 109 unsigned ResNo = User->getOperand(2).getResNo(); 110 if (Def->isMachineOpcode()) { 111 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); 112 if (ResNo >= II.getNumDefs() && 113 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { 114 PhysReg = Reg; 115 const TargetRegisterClass *RC = 116 TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); 117 Cost = RC->getCopyCost(); 118 } 119 } 120} 121 122static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { 123 SmallVector<EVT, 4> VTs; 124 SDNode *GlueDestNode = Glue.getNode(); 125 126 // Don't add glue from a node to itself. 127 if (GlueDestNode == N) return; 128 129 // Don't add glue to something which already has glue. 130 if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return; 131 132 for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) 133 VTs.push_back(N->getValueType(I)); 134 135 if (AddGlue) 136 VTs.push_back(MVT::Glue); 137 138 SmallVector<SDValue, 4> Ops; 139 for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) 140 Ops.push_back(N->getOperand(I)); 141 142 if (GlueDestNode) 143 Ops.push_back(Glue); 144 145 SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); 146 MachineSDNode::mmo_iterator Begin = 0, End = 0; 147 MachineSDNode *MN = dyn_cast<MachineSDNode>(N); 148 149 // Store memory references. 150 if (MN) { 151 Begin = MN->memoperands_begin(); 152 End = MN->memoperands_end(); 153 } 154 155 DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); 156 157 // Reset the memory references 158 if (MN) 159 MN->setMemRefs(Begin, End); 160} 161 162/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. 163/// This function finds loads of the same base and different offsets. If the 164/// offsets are not far apart (target specific), it add MVT::Glue inputs and 165/// outputs to ensure they are scheduled together and in order. This 166/// optimization may benefit some targets by improving cache locality. 167void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { 168 SDNode *Chain = 0; 169 unsigned NumOps = Node->getNumOperands(); 170 if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) 171 Chain = Node->getOperand(NumOps-1).getNode(); 172 if (!Chain) 173 return; 174 175 // Look for other loads of the same chain. Find loads that are loading from 176 // the same base pointer and different offsets. 177 SmallPtrSet<SDNode*, 16> Visited; 178 SmallVector<int64_t, 4> Offsets; 179 DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. 180 bool Cluster = false; 181 SDNode *Base = Node; 182 for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); 183 I != E; ++I) { 184 SDNode *User = *I; 185 if (User == Node || !Visited.insert(User)) 186 continue; 187 int64_t Offset1, Offset2; 188 if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || 189 Offset1 == Offset2) 190 // FIXME: Should be ok if they addresses are identical. But earlier 191 // optimizations really should have eliminated one of the loads. 192 continue; 193 if (O2SMap.insert(std::make_pair(Offset1, Base)).second) 194 Offsets.push_back(Offset1); 195 O2SMap.insert(std::make_pair(Offset2, User)); 196 Offsets.push_back(Offset2); 197 if (Offset2 < Offset1) 198 Base = User; 199 Cluster = true; 200 } 201 202 if (!Cluster) 203 return; 204 205 // Sort them in increasing order. 206 std::sort(Offsets.begin(), Offsets.end()); 207 208 // Check if the loads are close enough. 209 SmallVector<SDNode*, 4> Loads; 210 unsigned NumLoads = 0; 211 int64_t BaseOff = Offsets[0]; 212 SDNode *BaseLoad = O2SMap[BaseOff]; 213 Loads.push_back(BaseLoad); 214 for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { 215 int64_t Offset = Offsets[i]; 216 SDNode *Load = O2SMap[Offset]; 217 if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) 218 break; // Stop right here. Ignore loads that are further away. 219 Loads.push_back(Load); 220 ++NumLoads; 221 } 222 223 if (NumLoads == 0) 224 return; 225 226 // Cluster loads by adding MVT::Glue outputs and inputs. This also 227 // ensure they are scheduled in order of increasing addresses. 228 SDNode *Lead = Loads[0]; 229 AddGlue(Lead, SDValue(0, 0), true, DAG); 230 231 SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1); 232 for (unsigned I = 1, E = Loads.size(); I != E; ++I) { 233 bool OutGlue = I < E - 1; 234 SDNode *Load = Loads[I]; 235 236 AddGlue(Load, InGlue, OutGlue, DAG); 237 238 if (OutGlue) 239 InGlue = SDValue(Load, Load->getNumValues() - 1); 240 241 ++LoadsClustered; 242 } 243} 244 245/// ClusterNodes - Cluster certain nodes which should be scheduled together. 246/// 247void ScheduleDAGSDNodes::ClusterNodes() { 248 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 249 E = DAG->allnodes_end(); NI != E; ++NI) { 250 SDNode *Node = &*NI; 251 if (!Node || !Node->isMachineOpcode()) 252 continue; 253 254 unsigned Opc = Node->getMachineOpcode(); 255 const TargetInstrDesc &TID = TII->get(Opc); 256 if (TID.mayLoad()) 257 // Cluster loads from "near" addresses into combined SUnits. 258 ClusterNeighboringLoads(Node); 259 } 260} 261 262void ScheduleDAGSDNodes::BuildSchedUnits() { 263 // During scheduling, the NodeId field of SDNode is used to map SDNodes 264 // to their associated SUnits by holding SUnits table indices. A value 265 // of -1 means the SDNode does not yet have an associated SUnit. 266 unsigned NumNodes = 0; 267 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 268 E = DAG->allnodes_end(); NI != E; ++NI) { 269 NI->setNodeId(-1); 270 ++NumNodes; 271 } 272 273 // Reserve entries in the vector for each of the SUnits we are creating. This 274 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get 275 // invalidated. 276 // FIXME: Multiply by 2 because we may clone nodes during scheduling. 277 // This is a temporary workaround. 278 SUnits.reserve(NumNodes * 2); 279 280 // Add all nodes in depth first order. 281 SmallVector<SDNode*, 64> Worklist; 282 SmallPtrSet<SDNode*, 64> Visited; 283 Worklist.push_back(DAG->getRoot().getNode()); 284 Visited.insert(DAG->getRoot().getNode()); 285 286 while (!Worklist.empty()) { 287 SDNode *NI = Worklist.pop_back_val(); 288 289 // Add all operands to the worklist unless they've already been added. 290 for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) 291 if (Visited.insert(NI->getOperand(i).getNode())) 292 Worklist.push_back(NI->getOperand(i).getNode()); 293 294 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. 295 continue; 296 297 // If this node has already been processed, stop now. 298 if (NI->getNodeId() != -1) continue; 299 300 SUnit *NodeSUnit = NewSUnit(NI); 301 302 // See if anything is glued to this node, if so, add them to glued 303 // nodes. Nodes can have at most one glue input and one glue output. Glue 304 // is required to be the last operand and result of a node. 305 306 // Scan up to find glued preds. 307 SDNode *N = NI; 308 while (N->getNumOperands() && 309 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { 310 N = N->getOperand(N->getNumOperands()-1).getNode(); 311 assert(N->getNodeId() == -1 && "Node already inserted!"); 312 N->setNodeId(NodeSUnit->NodeNum); 313 if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) 314 NodeSUnit->isCall = true; 315 } 316 317 // Scan down to find any glued succs. 318 N = NI; 319 while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { 320 SDValue GlueVal(N, N->getNumValues()-1); 321 322 // There are either zero or one users of the Glue result. 323 bool HasGlueUse = false; 324 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 325 UI != E; ++UI) 326 if (GlueVal.isOperandOf(*UI)) { 327 HasGlueUse = true; 328 assert(N->getNodeId() == -1 && "Node already inserted!"); 329 N->setNodeId(NodeSUnit->NodeNum); 330 N = *UI; 331 if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) 332 NodeSUnit->isCall = true; 333 break; 334 } 335 if (!HasGlueUse) break; 336 } 337 338 // If there are glue operands involved, N is now the bottom-most node 339 // of the sequence of nodes that are glued together. 340 // Update the SUnit. 341 NodeSUnit->setNode(N); 342 assert(N->getNodeId() == -1 && "Node already inserted!"); 343 N->setNodeId(NodeSUnit->NodeNum); 344 345 // Compute NumRegDefsLeft. This must be done before AddSchedEdges. 346 InitNumRegDefsLeft(NodeSUnit); 347 348 // Assign the Latency field of NodeSUnit using target-provided information. 349 ComputeLatency(NodeSUnit); 350 } 351} 352 353void ScheduleDAGSDNodes::AddSchedEdges() { 354 const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); 355 356 // Check to see if the scheduler cares about latencies. 357 bool UnitLatencies = ForceUnitLatencies(); 358 359 // Pass 2: add the preds, succs, etc. 360 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { 361 SUnit *SU = &SUnits[su]; 362 SDNode *MainNode = SU->getNode(); 363 364 if (MainNode->isMachineOpcode()) { 365 unsigned Opc = MainNode->getMachineOpcode(); 366 const TargetInstrDesc &TID = TII->get(Opc); 367 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 368 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 369 SU->isTwoAddress = true; 370 break; 371 } 372 } 373 if (TID.isCommutable()) 374 SU->isCommutable = true; 375 } 376 377 // Find all predecessors and successors of the group. 378 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { 379 if (N->isMachineOpcode() && 380 TII->get(N->getMachineOpcode()).getImplicitDefs()) { 381 SU->hasPhysRegClobbers = true; 382 unsigned NumUsed = InstrEmitter::CountResults(N); 383 while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) 384 --NumUsed; // Skip over unused values at the end. 385 if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) 386 SU->hasPhysRegDefs = true; 387 } 388 389 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 390 SDNode *OpN = N->getOperand(i).getNode(); 391 if (isPassiveNode(OpN)) continue; // Not scheduled. 392 SUnit *OpSU = &SUnits[OpN->getNodeId()]; 393 assert(OpSU && "Node has no SUnit!"); 394 if (OpSU == SU) continue; // In the same group. 395 396 EVT OpVT = N->getOperand(i).getValueType(); 397 assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); 398 bool isChain = OpVT == MVT::Other; 399 400 unsigned PhysReg = 0; 401 int Cost = 1; 402 // Determine if this is a physical register dependency. 403 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); 404 assert((PhysReg == 0 || !isChain) && 405 "Chain dependence via physreg data?"); 406 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler 407 // emits a copy from the physical register to a virtual register unless 408 // it requires a cross class copy (cost < 0). That means we are only 409 // treating "expensive to copy" register dependency as physical register 410 // dependency. This may change in the future though. 411 if (Cost >= 0) 412 PhysReg = 0; 413 414 // If this is a ctrl dep, latency is 1. 415 // Special-case TokenFactor chains as zero-latency. 416 unsigned OpLatency = 1; 417 if (!isChain && OpSU->Latency > 0) 418 OpLatency = OpSU->Latency; 419 else if(isChain && OpN->getOpcode() == ISD::TokenFactor) 420 OpLatency = 0; 421 422 const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, 423 OpLatency, PhysReg); 424 if (!isChain && !UnitLatencies) { 425 ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); 426 ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); 427 } 428 429 if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { 430 // Multiple register uses are combined in the same SUnit. For example, 431 // we could have a set of glued nodes with all their defs consumed by 432 // another set of glued nodes. Register pressure tracking sees this as 433 // a single use, so to keep pressure balanced we reduce the defs. 434 // 435 // We can't tell (without more book-keeping) if this results from 436 // glued nodes or duplicate operands. As long as we don't reduce 437 // NumRegDefsLeft to zero, we handle the common cases well. 438 --OpSU->NumRegDefsLeft; 439 } 440 } 441 } 442 } 443} 444 445/// BuildSchedGraph - Build the SUnit graph from the selection dag that we 446/// are input. This SUnit graph is similar to the SelectionDAG, but 447/// excludes nodes that aren't interesting to scheduling, and represents 448/// glued together nodes with a single SUnit. 449void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { 450 // Cluster certain nodes which should be scheduled together. 451 ClusterNodes(); 452 // Populate the SUnits array. 453 BuildSchedUnits(); 454 // Compute all the scheduling dependencies between nodes. 455 AddSchedEdges(); 456} 457 458// Initialize NumNodeDefs for the current Node's opcode. 459void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { 460 // Check for phys reg copy. 461 if (!Node) 462 return; 463 464 if (!Node->isMachineOpcode()) { 465 if (Node->getOpcode() == ISD::CopyFromReg) 466 NodeNumDefs = 1; 467 else 468 NodeNumDefs = 0; 469 return; 470 } 471 unsigned POpc = Node->getMachineOpcode(); 472 if (POpc == TargetOpcode::IMPLICIT_DEF) { 473 // No register need be allocated for this. 474 NodeNumDefs = 0; 475 return; 476 } 477 unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); 478 // Some instructions define regs that are not represented in the selection DAG 479 // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. 480 NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); 481 DefIdx = 0; 482} 483 484// Construct a RegDefIter for this SUnit and find the first valid value. 485ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, 486 const ScheduleDAGSDNodes *SD) 487 : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { 488 InitNodeNumDefs(); 489 Advance(); 490} 491 492// Advance to the next valid value defined by the SUnit. 493void ScheduleDAGSDNodes::RegDefIter::Advance() { 494 for (;Node;) { // Visit all glued nodes. 495 for (;DefIdx < NodeNumDefs; ++DefIdx) { 496 if (!Node->hasAnyUseOfValue(DefIdx)) 497 continue; 498 if (Node->isMachineOpcode() && 499 Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { 500 // Propagate the incoming (full-register) type. I doubt it's needed. 501 ValueType = Node->getOperand(0).getValueType(); 502 } 503 else { 504 ValueType = Node->getValueType(DefIdx); 505 } 506 ++DefIdx; 507 return; // Found a normal regdef. 508 } 509 Node = Node->getGluedNode(); 510 if (Node == NULL) { 511 return; // No values left to visit. 512 } 513 InitNodeNumDefs(); 514 } 515} 516 517void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { 518 assert(SU->NumRegDefsLeft == 0 && "expect a new node"); 519 for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { 520 assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); 521 ++SU->NumRegDefsLeft; 522 } 523} 524 525void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { 526 // Check to see if the scheduler cares about latencies. 527 if (ForceUnitLatencies()) { 528 SU->Latency = 1; 529 return; 530 } 531 532 if (!InstrItins || InstrItins->isEmpty()) { 533 SDNode *N = SU->getNode(); 534 if (N && N->isMachineOpcode() && 535 TII->isHighLatencyDef(N->getMachineOpcode())) 536 SU->Latency = HighLatencyCycles; 537 else 538 SU->Latency = 1; 539 return; 540 } 541 542 // Compute the latency for the node. We use the sum of the latencies for 543 // all nodes glued together into this SUnit. 544 SU->Latency = 0; 545 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) 546 if (N->isMachineOpcode()) 547 SU->Latency += TII->getInstrLatency(InstrItins, N); 548} 549 550void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, 551 unsigned OpIdx, SDep& dep) const{ 552 // Check to see if the scheduler cares about latencies. 553 if (ForceUnitLatencies()) 554 return; 555 556 if (dep.getKind() != SDep::Data) 557 return; 558 559 unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); 560 if (Use->isMachineOpcode()) 561 // Adjust the use operand index by num of defs. 562 OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); 563 int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); 564 if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && 565 !BB->succ_empty()) { 566 unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); 567 if (TargetRegisterInfo::isVirtualRegister(Reg)) 568 // This copy is a liveout value. It is likely coalesced, so reduce the 569 // latency so not to penalize the def. 570 // FIXME: need target specific adjustment here? 571 Latency = (Latency > 1) ? Latency - 1 : 1; 572 } 573 if (Latency >= 0) 574 dep.setLatency(Latency); 575} 576 577void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { 578 if (!SU->getNode()) { 579 dbgs() << "PHYS REG COPY\n"; 580 return; 581 } 582 583 SU->getNode()->dump(DAG); 584 dbgs() << "\n"; 585 SmallVector<SDNode *, 4> GluedNodes; 586 for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) 587 GluedNodes.push_back(N); 588 while (!GluedNodes.empty()) { 589 dbgs() << " "; 590 GluedNodes.back()->dump(DAG); 591 dbgs() << "\n"; 592 GluedNodes.pop_back(); 593 } 594} 595 596namespace { 597 struct OrderSorter { 598 bool operator()(const std::pair<unsigned, MachineInstr*> &A, 599 const std::pair<unsigned, MachineInstr*> &B) { 600 return A.first < B.first; 601 } 602 }; 603} 604 605/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. 606static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, 607 InstrEmitter &Emitter, 608 SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, 609 DenseMap<SDValue, unsigned> &VRBaseMap, 610 unsigned Order) { 611 if (!N->getHasDebugValue()) 612 return; 613 614 // Opportunistically insert immediate dbg_value uses, i.e. those with source 615 // order number right after the N. 616 MachineBasicBlock *BB = Emitter.getBlock(); 617 MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); 618 SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); 619 for (unsigned i = 0, e = DVs.size(); i != e; ++i) { 620 if (DVs[i]->isInvalidated()) 621 continue; 622 unsigned DVOrder = DVs[i]->getOrder(); 623 if (!Order || DVOrder == ++Order) { 624 MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); 625 if (DbgMI) { 626 Orders.push_back(std::make_pair(DVOrder, DbgMI)); 627 BB->insert(InsertPos, DbgMI); 628 } 629 DVs[i]->setIsInvalidated(); 630 } 631 } 632} 633 634// ProcessSourceNode - Process nodes with source order numbers. These are added 635// to a vector which EmitSchedule uses to determine how to insert dbg_value 636// instructions in the right order. 637static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, 638 InstrEmitter &Emitter, 639 DenseMap<SDValue, unsigned> &VRBaseMap, 640 SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, 641 SmallSet<unsigned, 8> &Seen) { 642 unsigned Order = DAG->GetOrdering(N); 643 if (!Order || !Seen.insert(Order)) { 644 // Process any valid SDDbgValues even if node does not have any order 645 // assigned. 646 ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); 647 return; 648 } 649 650 MachineBasicBlock *BB = Emitter.getBlock(); 651 if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { 652 // Did not insert any instruction. 653 Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); 654 return; 655 } 656 657 Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); 658 ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); 659} 660 661 662/// EmitSchedule - Emit the machine code in scheduled order. 663MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { 664 InstrEmitter Emitter(BB, InsertPos); 665 DenseMap<SDValue, unsigned> VRBaseMap; 666 DenseMap<SUnit*, unsigned> CopyVRBaseMap; 667 SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; 668 SmallSet<unsigned, 8> Seen; 669 bool HasDbg = DAG->hasDebugValues(); 670 671 // If this is the first BB, emit byval parameter dbg_value's. 672 if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { 673 SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); 674 SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); 675 for (; PDI != PDE; ++PDI) { 676 MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); 677 if (DbgMI) 678 BB->insert(InsertPos, DbgMI); 679 } 680 } 681 682 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 683 SUnit *SU = Sequence[i]; 684 if (!SU) { 685 // Null SUnit* is a noop. 686 EmitNoop(); 687 continue; 688 } 689 690 // For pre-regalloc scheduling, create instructions corresponding to the 691 // SDNode and any glued SDNodes and append them to the block. 692 if (!SU->getNode()) { 693 // Emit a copy. 694 EmitPhysRegCopy(SU, CopyVRBaseMap); 695 continue; 696 } 697 698 SmallVector<SDNode *, 4> GluedNodes; 699 for (SDNode *N = SU->getNode()->getGluedNode(); N; 700 N = N->getGluedNode()) 701 GluedNodes.push_back(N); 702 while (!GluedNodes.empty()) { 703 SDNode *N = GluedNodes.back(); 704 Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, 705 VRBaseMap); 706 // Remember the source order of the inserted instruction. 707 if (HasDbg) 708 ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); 709 GluedNodes.pop_back(); 710 } 711 Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, 712 VRBaseMap); 713 // Remember the source order of the inserted instruction. 714 if (HasDbg) 715 ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, 716 Seen); 717 } 718 719 // Insert all the dbg_values which have not already been inserted in source 720 // order sequence. 721 if (HasDbg) { 722 MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); 723 724 // Sort the source order instructions and use the order to insert debug 725 // values. 726 std::sort(Orders.begin(), Orders.end(), OrderSorter()); 727 728 SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); 729 SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); 730 // Now emit the rest according to source order. 731 unsigned LastOrder = 0; 732 for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { 733 unsigned Order = Orders[i].first; 734 MachineInstr *MI = Orders[i].second; 735 // Insert all SDDbgValue's whose order(s) are before "Order". 736 if (!MI) 737 continue; 738 for (; DI != DE && 739 (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { 740 if ((*DI)->isInvalidated()) 741 continue; 742 MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); 743 if (DbgMI) { 744 if (!LastOrder) 745 // Insert to start of the BB (after PHIs). 746 BB->insert(BBBegin, DbgMI); 747 else { 748 // Insert at the instruction, which may be in a different 749 // block, if the block was split by a custom inserter. 750 MachineBasicBlock::iterator Pos = MI; 751 MI->getParent()->insert(llvm::next(Pos), DbgMI); 752 } 753 } 754 } 755 LastOrder = Order; 756 } 757 // Add trailing DbgValue's before the terminator. FIXME: May want to add 758 // some of them before one or more conditional branches? 759 while (DI != DE) { 760 MachineBasicBlock *InsertBB = Emitter.getBlock(); 761 MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); 762 if (!(*DI)->isInvalidated()) { 763 MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); 764 if (DbgMI) 765 InsertBB->insert(Pos, DbgMI); 766 } 767 ++DI; 768 } 769 } 770 771 BB = Emitter.getBlock(); 772 InsertPos = Emitter.getInsertPos(); 773 return BB; 774} 775