ScheduleDAGSDNodes.cpp revision c589e03865bb31da70e0037d5c32fdaaa5f79f24
1//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the ScheduleDAG class, which is a base class used by 11// scheduling implementation classes. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "pre-RA-sched" 16#include "ScheduleDAGSDNodes.h" 17#include "InstrEmitter.h" 18#include "llvm/CodeGen/SelectionDAG.h" 19#include "llvm/Target/TargetMachine.h" 20#include "llvm/Target/TargetInstrInfo.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetSubtarget.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/SmallPtrSet.h" 25#include "llvm/ADT/SmallVector.h" 26#include "llvm/ADT/Statistic.h" 27#include "llvm/Support/CommandLine.h" 28#include "llvm/Support/Debug.h" 29#include "llvm/Support/raw_ostream.h" 30using namespace llvm; 31 32STATISTIC(LoadsClustered, "Number of loads clustered together"); 33 34static cl::opt<bool> 35ClusterLoads("cluster-loads", cl::Hidden, 36 cl::desc("Schedule nearby loads together and in order")); 37 38ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) 39 : ScheduleDAG(mf) { 40} 41 42/// Run - perform scheduling. 43/// 44void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, 45 MachineBasicBlock::iterator insertPos) { 46 DAG = dag; 47 ScheduleDAG::Run(bb, insertPos); 48} 49 50SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { 51 SUnit *SU = NewSUnit(Old->getNode()); 52 SU->OrigNode = Old->OrigNode; 53 SU->Latency = Old->Latency; 54 SU->isTwoAddress = Old->isTwoAddress; 55 SU->isCommutable = Old->isCommutable; 56 SU->hasPhysRegDefs = Old->hasPhysRegDefs; 57 SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; 58 Old->isCloned = true; 59 return SU; 60} 61 62/// CheckForPhysRegDependency - Check if the dependency between def and use of 63/// a specified operand is a physical register dependency. If so, returns the 64/// register and the cost of copying the register. 65static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, 66 const TargetRegisterInfo *TRI, 67 const TargetInstrInfo *TII, 68 unsigned &PhysReg, int &Cost) { 69 if (Op != 2 || User->getOpcode() != ISD::CopyToReg) 70 return; 71 72 unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); 73 if (TargetRegisterInfo::isVirtualRegister(Reg)) 74 return; 75 76 unsigned ResNo = User->getOperand(2).getResNo(); 77 if (Def->isMachineOpcode()) { 78 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); 79 if (ResNo >= II.getNumDefs() && 80 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { 81 PhysReg = Reg; 82 const TargetRegisterClass *RC = 83 TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); 84 Cost = RC->getCopyCost(); 85 } 86 } 87} 88 89static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, 90 SelectionDAG *DAG) { 91 SmallVector<EVT, 4> VTs; 92 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) 93 VTs.push_back(N->getValueType(i)); 94 if (AddFlag) 95 VTs.push_back(MVT::Flag); 96 SmallVector<SDValue, 4> Ops; 97 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 98 Ops.push_back(N->getOperand(i)); 99 if (Flag.getNode()) 100 Ops.push_back(Flag); 101 SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); 102 DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); 103} 104 105/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. 106/// This function finds loads of the same base and different offsets. If the 107/// offsets are not far apart (target specific), it add MVT::Flag inputs and 108/// outputs to ensure they are scheduled together and in order. This 109/// optimization may benefit some targets by improving cache locality. 110void ScheduleDAGSDNodes::ClusterNeighboringLoads() { 111 SmallPtrSet<SDNode*, 16> Visited; 112 SmallVector<int64_t, 4> Offsets; 113 DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. 114 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 115 E = DAG->allnodes_end(); NI != E; ++NI) { 116 SDNode *Node = &*NI; 117 if (!Node || !Node->isMachineOpcode()) 118 continue; 119 120 unsigned Opc = Node->getMachineOpcode(); 121 const TargetInstrDesc &TID = TII->get(Opc); 122 if (!TID.mayLoad()) 123 continue; 124 125 SDNode *Chain = 0; 126 unsigned NumOps = Node->getNumOperands(); 127 if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) 128 Chain = Node->getOperand(NumOps-1).getNode(); 129 if (!Chain) 130 continue; 131 132 // Look for other loads of the same chain. Find loads that are loading from 133 // the same base pointer and different offsets. 134 Visited.clear(); 135 Offsets.clear(); 136 O2SMap.clear(); 137 bool Cluster = false; 138 SDNode *Base = Node; 139 int64_t BaseOffset; 140 for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); 141 I != E; ++I) { 142 SDNode *User = *I; 143 if (User == Node || !Visited.insert(User)) 144 continue; 145 int64_t Offset1, Offset2; 146 if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || 147 Offset1 == Offset2) 148 // FIXME: Should be ok if they addresses are identical. But earlier 149 // optimizations really should have eliminated one of the loads. 150 continue; 151 if (O2SMap.insert(std::make_pair(Offset1, Base)).second) 152 Offsets.push_back(Offset1); 153 O2SMap.insert(std::make_pair(Offset2, User)); 154 Offsets.push_back(Offset2); 155 if (Offset2 < Offset1) { 156 Base = User; 157 BaseOffset = Offset2; 158 } else { 159 BaseOffset = Offset1; 160 } 161 Cluster = true; 162 } 163 164 if (!Cluster) 165 continue; 166 167 // Sort them in increasing order. 168 std::sort(Offsets.begin(), Offsets.end()); 169 170 // Check if the loads are close enough. 171 SmallVector<SDNode*, 4> Loads; 172 unsigned NumLoads = 0; 173 int64_t BaseOff = Offsets[0]; 174 SDNode *BaseLoad = O2SMap[BaseOff]; 175 Loads.push_back(BaseLoad); 176 for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { 177 int64_t Offset = Offsets[i]; 178 SDNode *Load = O2SMap[Offset]; 179 if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, 180 NumLoads)) 181 break; // Stop right here. Ignore loads that are further away. 182 Loads.push_back(Load); 183 ++NumLoads; 184 } 185 186 if (NumLoads == 0) 187 continue; 188 189 // Cluster loads by adding MVT::Flag outputs and inputs. This also 190 // ensure they are scheduled in order of increasing addresses. 191 SDNode *Lead = Loads[0]; 192 AddFlags(Lead, SDValue(0,0), true, DAG); 193 SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); 194 for (unsigned i = 1, e = Loads.size(); i != e; ++i) { 195 bool OutFlag = i < e-1; 196 SDNode *Load = Loads[i]; 197 AddFlags(Load, InFlag, OutFlag, DAG); 198 if (OutFlag) 199 InFlag = SDValue(Load, Load->getNumValues()-1); 200 ++LoadsClustered; 201 } 202 } 203} 204 205void ScheduleDAGSDNodes::BuildSchedUnits() { 206 // During scheduling, the NodeId field of SDNode is used to map SDNodes 207 // to their associated SUnits by holding SUnits table indices. A value 208 // of -1 means the SDNode does not yet have an associated SUnit. 209 unsigned NumNodes = 0; 210 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 211 E = DAG->allnodes_end(); NI != E; ++NI) { 212 NI->setNodeId(-1); 213 ++NumNodes; 214 } 215 216 // Reserve entries in the vector for each of the SUnits we are creating. This 217 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get 218 // invalidated. 219 // FIXME: Multiply by 2 because we may clone nodes during scheduling. 220 // This is a temporary workaround. 221 SUnits.reserve(NumNodes * 2); 222 223 // Check to see if the scheduler cares about latencies. 224 bool UnitLatencies = ForceUnitLatencies(); 225 226 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), 227 E = DAG->allnodes_end(); NI != E; ++NI) { 228 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. 229 continue; 230 231 // If this node has already been processed, stop now. 232 if (NI->getNodeId() != -1) continue; 233 234 SUnit *NodeSUnit = NewSUnit(NI); 235 236 // See if anything is flagged to this node, if so, add them to flagged 237 // nodes. Nodes can have at most one flag input and one flag output. Flags 238 // are required to be the last operand and result of a node. 239 240 // Scan up to find flagged preds. 241 SDNode *N = NI; 242 while (N->getNumOperands() && 243 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { 244 N = N->getOperand(N->getNumOperands()-1).getNode(); 245 assert(N->getNodeId() == -1 && "Node already inserted!"); 246 N->setNodeId(NodeSUnit->NodeNum); 247 } 248 249 // Scan down to find any flagged succs. 250 N = NI; 251 while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { 252 SDValue FlagVal(N, N->getNumValues()-1); 253 254 // There are either zero or one users of the Flag result. 255 bool HasFlagUse = false; 256 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 257 UI != E; ++UI) 258 if (FlagVal.isOperandOf(*UI)) { 259 HasFlagUse = true; 260 assert(N->getNodeId() == -1 && "Node already inserted!"); 261 N->setNodeId(NodeSUnit->NodeNum); 262 N = *UI; 263 break; 264 } 265 if (!HasFlagUse) break; 266 } 267 268 // If there are flag operands involved, N is now the bottom-most node 269 // of the sequence of nodes that are flagged together. 270 // Update the SUnit. 271 NodeSUnit->setNode(N); 272 assert(N->getNodeId() == -1 && "Node already inserted!"); 273 N->setNodeId(NodeSUnit->NodeNum); 274 275 // Assign the Latency field of NodeSUnit using target-provided information. 276 if (UnitLatencies) 277 NodeSUnit->Latency = 1; 278 else 279 ComputeLatency(NodeSUnit); 280 } 281} 282 283void ScheduleDAGSDNodes::AddSchedEdges() { 284 const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); 285 286 // Check to see if the scheduler cares about latencies. 287 bool UnitLatencies = ForceUnitLatencies(); 288 289 // Pass 2: add the preds, succs, etc. 290 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { 291 SUnit *SU = &SUnits[su]; 292 SDNode *MainNode = SU->getNode(); 293 294 if (MainNode->isMachineOpcode()) { 295 unsigned Opc = MainNode->getMachineOpcode(); 296 const TargetInstrDesc &TID = TII->get(Opc); 297 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 298 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 299 SU->isTwoAddress = true; 300 break; 301 } 302 } 303 if (TID.isCommutable()) 304 SU->isCommutable = true; 305 } 306 307 // Find all predecessors and successors of the group. 308 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { 309 if (N->isMachineOpcode() && 310 TII->get(N->getMachineOpcode()).getImplicitDefs()) { 311 SU->hasPhysRegClobbers = true; 312 unsigned NumUsed = InstrEmitter::CountResults(N); 313 while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) 314 --NumUsed; // Skip over unused values at the end. 315 if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) 316 SU->hasPhysRegDefs = true; 317 } 318 319 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 320 SDNode *OpN = N->getOperand(i).getNode(); 321 if (isPassiveNode(OpN)) continue; // Not scheduled. 322 SUnit *OpSU = &SUnits[OpN->getNodeId()]; 323 assert(OpSU && "Node has no SUnit!"); 324 if (OpSU == SU) continue; // In the same group. 325 326 EVT OpVT = N->getOperand(i).getValueType(); 327 assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); 328 bool isChain = OpVT == MVT::Other; 329 330 unsigned PhysReg = 0; 331 int Cost = 1; 332 // Determine if this is a physical register dependency. 333 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); 334 assert((PhysReg == 0 || !isChain) && 335 "Chain dependence via physreg data?"); 336 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler 337 // emits a copy from the physical register to a virtual register unless 338 // it requires a cross class copy (cost < 0). That means we are only 339 // treating "expensive to copy" register dependency as physical register 340 // dependency. This may change in the future though. 341 if (Cost >= 0) 342 PhysReg = 0; 343 344 const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, 345 OpSU->Latency, PhysReg); 346 if (!isChain && !UnitLatencies) { 347 ComputeOperandLatency(OpSU, SU, (SDep &)dep); 348 ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); 349 } 350 351 SU->addPred(dep); 352 } 353 } 354 } 355} 356 357/// BuildSchedGraph - Build the SUnit graph from the selection dag that we 358/// are input. This SUnit graph is similar to the SelectionDAG, but 359/// excludes nodes that aren't interesting to scheduling, and represents 360/// flagged together nodes with a single SUnit. 361void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { 362 // Cluster loads from "near" addresses into combined SUnits. 363 if (ClusterLoads) 364 ClusterNeighboringLoads(); 365 // Populate the SUnits array. 366 BuildSchedUnits(); 367 // Compute all the scheduling dependencies between nodes. 368 AddSchedEdges(); 369} 370 371void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { 372 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 373 374 // Compute the latency for the node. We use the sum of the latencies for 375 // all nodes flagged together into this SUnit. 376 SU->Latency = 0; 377 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) 378 if (N->isMachineOpcode()) { 379 SU->Latency += InstrItins. 380 getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); 381 } 382} 383 384void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { 385 if (!SU->getNode()) { 386 dbgs() << "PHYS REG COPY\n"; 387 return; 388 } 389 390 SU->getNode()->dump(DAG); 391 dbgs() << "\n"; 392 SmallVector<SDNode *, 4> FlaggedNodes; 393 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) 394 FlaggedNodes.push_back(N); 395 while (!FlaggedNodes.empty()) { 396 dbgs() << " "; 397 FlaggedNodes.back()->dump(DAG); 398 dbgs() << "\n"; 399 FlaggedNodes.pop_back(); 400 } 401} 402 403/// EmitSchedule - Emit the machine code in scheduled order. 404MachineBasicBlock *ScheduleDAGSDNodes:: 405EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { 406 InstrEmitter Emitter(BB, InsertPos); 407 DenseMap<SDValue, unsigned> VRBaseMap; 408 DenseMap<SUnit*, unsigned> CopyVRBaseMap; 409 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 410 SUnit *SU = Sequence[i]; 411 if (!SU) { 412 // Null SUnit* is a noop. 413 EmitNoop(); 414 continue; 415 } 416 417 // For pre-regalloc scheduling, create instructions corresponding to the 418 // SDNode and any flagged SDNodes and append them to the block. 419 if (!SU->getNode()) { 420 // Emit a copy. 421 EmitPhysRegCopy(SU, CopyVRBaseMap); 422 continue; 423 } 424 425 SmallVector<SDNode *, 4> FlaggedNodes; 426 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; 427 N = N->getFlaggedNode()) 428 FlaggedNodes.push_back(N); 429 while (!FlaggedNodes.empty()) { 430 Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, 431 VRBaseMap, EM); 432 FlaggedNodes.pop_back(); 433 } 434 Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, 435 VRBaseMap, EM); 436 } 437 438 BB = Emitter.getBlock(); 439 InsertPos = Emitter.getInsertPos(); 440 return BB; 441} 442