ScheduleDAGSDNodes.cpp revision a8efe28a44996978faa42a387f1a6087a7b942c7
1//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAG class, which is a base class used by 11// scheduling implementation classes. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "pre-RA-sched" 16#include "SDNodeDbgValue.h" 17#include "ScheduleDAGSDNodes.h" 18#include "InstrEmitter.h" 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/Target/TargetMachine.h" 21#include "llvm/Target/TargetInstrInfo.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetSubtarget.h" 24#include "llvm/ADT/DenseMap.h" 25#include "llvm/ADT/SmallPtrSet.h" 26#include "llvm/ADT/SmallVector.h" 27#include "llvm/ADT/Statistic.h" 28#include "llvm/Support/Debug.h" 29#include "llvm/Support/raw_ostream.h" 30using namespace llvm; 31 32STATISTIC(LoadsClustered, "Number of loads clustered together"); 33 34ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) 35 : ScheduleDAG(mf) { 36} 37 38/// Run - perform scheduling. 39/// 40void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, 41 MachineBasicBlock::iterator insertPos) { 42 DAG = dag; 43 ScheduleDAG::Run(bb, insertPos); 44} 45 46SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { 47 SUnit *SU = NewSUnit(Old->getNode()); 48 SU->OrigNode = Old->OrigNode; 49 SU->Latency = Old->Latency; 50 SU->isTwoAddress = Old->isTwoAddress; 51 SU->isCommutable = Old->isCommutable; 52 SU->hasPhysRegDefs = Old->hasPhysRegDefs; 53 SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; 54 Old->isCloned = true; 55 return SU; 56} 57 58/// CheckForPhysRegDependency - Check if the dependency between def and use of 59/// a specified operand is a physical register dependency. If so, returns the 60/// register and the cost of copying the register. 61static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, 62 const TargetRegisterInfo *TRI, 63 const TargetInstrInfo *TII, 64 unsigned &PhysReg, int &Cost) { 65 if (Op != 2 || User->getOpcode() != ISD::CopyToReg) 66 return; 67 68 unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); 69 if (TargetRegisterInfo::isVirtualRegister(Reg)) 70 return; 71 72 unsigned ResNo = User->getOperand(2).getResNo(); 73 if (Def->isMachineOpcode()) { 74 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); 75 if (ResNo >= II.getNumDefs() && 76 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { 77 PhysReg = Reg; 78 const TargetRegisterClass *RC = 79 TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); 80 Cost = RC->getCopyCost(); 81 } 82 } 83} 84 85static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, 86 SelectionDAG *DAG) { 87 SmallVector<EVT, 4> VTs; 88 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) 89 VTs.push_back(N->getValueType(i)); 90 if (AddFlag) 91 VTs.push_back(MVT::Flag); 92 SmallVector<SDValue, 4> Ops; 93 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 94 Ops.push_back(N->getOperand(i)); 95 if (Flag.getNode()) 96 Ops.push_back(Flag); 97 SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); 98 DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); 99} 100 101/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. 102/// This function finds loads of the same base and different offsets. If the 103/// offsets are not far apart (target specific), it add MVT::Flag inputs and 104/// outputs to ensure they are scheduled together and in order. This 105/// optimization may benefit some targets by improving cache locality. 106void ScheduleDAGSDNodes::ClusterNeighboringLoads() { 107 SmallPtrSet<SDNode*, 16> Visited; 108 SmallVector<int64_t, 4> Offsets; 109 DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. 110 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 111 E = DAG->allnodes_end(); NI != E; ++NI) { 112 SDNode *Node = &*NI; 113 if (!Node || !Node->isMachineOpcode()) 114 continue; 115 116 unsigned Opc = Node->getMachineOpcode(); 117 const TargetInstrDesc &TID = TII->get(Opc); 118 if (!TID.mayLoad()) 119 continue; 120 121 SDNode *Chain = 0; 122 unsigned NumOps = Node->getNumOperands(); 123 if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) 124 Chain = Node->getOperand(NumOps-1).getNode(); 125 if (!Chain) 126 continue; 127 128 // Look for other loads of the same chain. Find loads that are loading from 129 // the same base pointer and different offsets. 130 Visited.clear(); 131 Offsets.clear(); 132 O2SMap.clear(); 133 bool Cluster = false; 134 SDNode *Base = Node; 135 int64_t BaseOffset; 136 for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); 137 I != E; ++I) { 138 SDNode *User = *I; 139 if (User == Node || !Visited.insert(User)) 140 continue; 141 int64_t Offset1, Offset2; 142 if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || 143 Offset1 == Offset2) 144 // FIXME: Should be ok if they addresses are identical. But earlier 145 // optimizations really should have eliminated one of the loads. 146 continue; 147 if (O2SMap.insert(std::make_pair(Offset1, Base)).second) 148 Offsets.push_back(Offset1); 149 O2SMap.insert(std::make_pair(Offset2, User)); 150 Offsets.push_back(Offset2); 151 if (Offset2 < Offset1) { 152 Base = User; 153 BaseOffset = Offset2; 154 } else { 155 BaseOffset = Offset1; 156 } 157 Cluster = true; 158 } 159 160 if (!Cluster) 161 continue; 162 163 // Sort them in increasing order. 164 std::sort(Offsets.begin(), Offsets.end()); 165 166 // Check if the loads are close enough. 167 SmallVector<SDNode*, 4> Loads; 168 unsigned NumLoads = 0; 169 int64_t BaseOff = Offsets[0]; 170 SDNode *BaseLoad = O2SMap[BaseOff]; 171 Loads.push_back(BaseLoad); 172 for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { 173 int64_t Offset = Offsets[i]; 174 SDNode *Load = O2SMap[Offset]; 175 if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, 176 NumLoads)) 177 break; // Stop right here. Ignore loads that are further away. 178 Loads.push_back(Load); 179 ++NumLoads; 180 } 181 182 if (NumLoads == 0) 183 continue; 184 185 // Cluster loads by adding MVT::Flag outputs and inputs. This also 186 // ensure they are scheduled in order of increasing addresses. 187 SDNode *Lead = Loads[0]; 188 AddFlags(Lead, SDValue(0,0), true, DAG); 189 SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); 190 for (unsigned i = 1, e = Loads.size(); i != e; ++i) { 191 bool OutFlag = i < e-1; 192 SDNode *Load = Loads[i]; 193 AddFlags(Load, InFlag, OutFlag, DAG); 194 if (OutFlag) 195 InFlag = SDValue(Load, Load->getNumValues()-1); 196 ++LoadsClustered; 197 } 198 } 199} 200 201void ScheduleDAGSDNodes::BuildSchedUnits() { 202 // During scheduling, the NodeId field of SDNode is used to map SDNodes 203 // to their associated SUnits by holding SUnits table indices. A value 204 // of -1 means the SDNode does not yet have an associated SUnit. 205 unsigned NumNodes = 0; 206 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 207 E = DAG->allnodes_end(); NI != E; ++NI) { 208 NI->setNodeId(-1); 209 ++NumNodes; 210 } 211 212 // Reserve entries in the vector for each of the SUnits we are creating. This 213 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get 214 // invalidated. 215 // FIXME: Multiply by 2 because we may clone nodes during scheduling. 216 // This is a temporary workaround. 217 SUnits.reserve(NumNodes * 2); 218 219 // Check to see if the scheduler cares about latencies. 220 bool UnitLatencies = ForceUnitLatencies(); 221 222 // Add all nodes in depth first order. 223 SmallVector<SDNode*, 64> Worklist; 224 SmallPtrSet<SDNode*, 64> Visited; 225 Worklist.push_back(DAG->getRoot().getNode()); 226 Visited.insert(DAG->getRoot().getNode()); 227 228 while (!Worklist.empty()) { 229 SDNode *NI = Worklist.pop_back_val(); 230 231 // Add all operands to the worklist unless they've already been added. 232 for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) 233 if (Visited.insert(NI->getOperand(i).getNode())) 234 Worklist.push_back(NI->getOperand(i).getNode()); 235 236 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. 237 continue; 238 239 // If this node has already been processed, stop now. 240 if (NI->getNodeId() != -1) continue; 241 242 SUnit *NodeSUnit = NewSUnit(NI); 243 244 // See if anything is flagged to this node, if so, add them to flagged 245 // nodes. Nodes can have at most one flag input and one flag output. Flags 246 // are required to be the last operand and result of a node. 247 248 // Scan up to find flagged preds. 249 SDNode *N = NI; 250 while (N->getNumOperands() && 251 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { 252 N = N->getOperand(N->getNumOperands()-1).getNode(); 253 assert(N->getNodeId() == -1 && "Node already inserted!"); 254 N->setNodeId(NodeSUnit->NodeNum); 255 } 256 257 // Scan down to find any flagged succs. 258 N = NI; 259 while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { 260 SDValue FlagVal(N, N->getNumValues()-1); 261 262 // There are either zero or one users of the Flag result. 263 bool HasFlagUse = false; 264 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 265 UI != E; ++UI) 266 if (FlagVal.isOperandOf(*UI)) { 267 HasFlagUse = true; 268 assert(N->getNodeId() == -1 && "Node already inserted!"); 269 N->setNodeId(NodeSUnit->NodeNum); 270 N = *UI; 271 break; 272 } 273 if (!HasFlagUse) break; 274 } 275 276 // If there are flag operands involved, N is now the bottom-most node 277 // of the sequence of nodes that are flagged together. 278 // Update the SUnit. 279 NodeSUnit->setNode(N); 280 assert(N->getNodeId() == -1 && "Node already inserted!"); 281 N->setNodeId(NodeSUnit->NodeNum); 282 283 // Assign the Latency field of NodeSUnit using target-provided information. 284 if (UnitLatencies) 285 NodeSUnit->Latency = 1; 286 else 287 ComputeLatency(NodeSUnit); 288 } 289} 290 291void ScheduleDAGSDNodes::AddSchedEdges() { 292 const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); 293 294 // Check to see if the scheduler cares about latencies. 295 bool UnitLatencies = ForceUnitLatencies(); 296 297 // Pass 2: add the preds, succs, etc. 298 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { 299 SUnit *SU = &SUnits[su]; 300 SDNode *MainNode = SU->getNode(); 301 302 if (MainNode->isMachineOpcode()) { 303 unsigned Opc = MainNode->getMachineOpcode(); 304 const TargetInstrDesc &TID = TII->get(Opc); 305 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 306 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 307 SU->isTwoAddress = true; 308 break; 309 } 310 } 311 if (TID.isCommutable()) 312 SU->isCommutable = true; 313 } 314 315 // Find all predecessors and successors of the group. 316 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { 317 if (N->isMachineOpcode() && 318 TII->get(N->getMachineOpcode()).getImplicitDefs()) { 319 SU->hasPhysRegClobbers = true; 320 unsigned NumUsed = InstrEmitter::CountResults(N); 321 while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) 322 --NumUsed; // Skip over unused values at the end. 323 if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) 324 SU->hasPhysRegDefs = true; 325 } 326 327 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 328 SDNode *OpN = N->getOperand(i).getNode(); 329 if (isPassiveNode(OpN)) continue; // Not scheduled. 330 SUnit *OpSU = &SUnits[OpN->getNodeId()]; 331 assert(OpSU && "Node has no SUnit!"); 332 if (OpSU == SU) continue; // In the same group. 333 334 EVT OpVT = N->getOperand(i).getValueType(); 335 assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); 336 bool isChain = OpVT == MVT::Other; 337 338 unsigned PhysReg = 0; 339 int Cost = 1; 340 // Determine if this is a physical register dependency. 341 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); 342 assert((PhysReg == 0 || !isChain) && 343 "Chain dependence via physreg data?"); 344 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler 345 // emits a copy from the physical register to a virtual register unless 346 // it requires a cross class copy (cost < 0). That means we are only 347 // treating "expensive to copy" register dependency as physical register 348 // dependency. This may change in the future though. 349 if (Cost >= 0) 350 PhysReg = 0; 351 352 const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, 353 OpSU->Latency, PhysReg); 354 if (!isChain && !UnitLatencies) { 355 ComputeOperandLatency(OpSU, SU, (SDep &)dep); 356 ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); 357 } 358 359 SU->addPred(dep); 360 } 361 } 362 } 363} 364 365/// BuildSchedGraph - Build the SUnit graph from the selection dag that we 366/// are input. This SUnit graph is similar to the SelectionDAG, but 367/// excludes nodes that aren't interesting to scheduling, and represents 368/// flagged together nodes with a single SUnit. 369void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { 370 // Cluster loads from "near" addresses into combined SUnits. 371 ClusterNeighboringLoads(); 372 // Populate the SUnits array. 373 BuildSchedUnits(); 374 // Compute all the scheduling dependencies between nodes. 375 AddSchedEdges(); 376} 377 378void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { 379 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 380 381 // Compute the latency for the node. We use the sum of the latencies for 382 // all nodes flagged together into this SUnit. 383 SU->Latency = 0; 384 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) 385 if (N->isMachineOpcode()) { 386 SU->Latency += InstrItins. 387 getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); 388 } 389} 390 391void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { 392 if (!SU->getNode()) { 393 dbgs() << "PHYS REG COPY\n"; 394 return; 395 } 396 397 SU->getNode()->dump(DAG); 398 dbgs() << "\n"; 399 SmallVector<SDNode *, 4> FlaggedNodes; 400 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) 401 FlaggedNodes.push_back(N); 402 while (!FlaggedNodes.empty()) { 403 dbgs() << " "; 404 FlaggedNodes.back()->dump(DAG); 405 dbgs() << "\n"; 406 FlaggedNodes.pop_back(); 407 } 408} 409 410/// EmitSchedule - Emit the machine code in scheduled order. 411MachineBasicBlock *ScheduleDAGSDNodes:: 412EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { 413 InstrEmitter Emitter(BB, InsertPos); 414 DenseMap<SDValue, unsigned> VRBaseMap; 415 DenseMap<SUnit*, unsigned> CopyVRBaseMap; 416 417 // For now, any constant debug info nodes go at the beginning. 418 for (SDDbgInfo::ConstDbgIterator I = DAG->DbgConstBegin(), 419 E = DAG->DbgConstEnd(); I!=E; I++) { 420 Emitter.EmitDbgValue(*I, EM); 421 delete *I; 422 } 423 424 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 425 SUnit *SU = Sequence[i]; 426 if (!SU) { 427 // Null SUnit* is a noop. 428 EmitNoop(); 429 continue; 430 } 431 432 // For pre-regalloc scheduling, create instructions corresponding to the 433 // SDNode and any flagged SDNodes and append them to the block. 434 if (!SU->getNode()) { 435 // Emit a copy. 436 EmitPhysRegCopy(SU, CopyVRBaseMap); 437 continue; 438 } 439 440 SmallVector<SDNode *, 4> FlaggedNodes; 441 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; 442 N = N->getFlaggedNode()) 443 FlaggedNodes.push_back(N); 444 while (!FlaggedNodes.empty()) { 445 Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, 446 VRBaseMap, EM); 447 if (FlaggedNodes.back()->getHasDebugValue()) 448 if (SDDbgValue *sd = DAG->GetDbgInfo(FlaggedNodes.back())) { 449 Emitter.EmitDbgValue(FlaggedNodes.back(), VRBaseMap, sd); 450 delete sd; 451 } 452 FlaggedNodes.pop_back(); 453 } 454 Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, 455 VRBaseMap, EM); 456 if (SU->getNode()->getHasDebugValue()) 457 if (SDDbgValue *sd = DAG->GetDbgInfo(SU->getNode())) { 458 Emitter.EmitDbgValue(SU->getNode(), VRBaseMap, sd); 459 delete sd; 460 } 461 } 462 463 BB = Emitter.getBlock(); 464 InsertPos = Emitter.getInsertPos(); 465 return BB; 466} 467