RegAllocGreedy.cpp revision 7d6b6a05b549da70b4473f015c97954c2a422724
18459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2d90c141feb10c80581c16dbb57fcc97719a0c002vandebo@chromium.org// 38459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// The LLVM Compiler Infrastructure 48459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// 58459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// This file is distributed under the University of Illinois Open Source 68459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// License. See LICENSE.TXT for details. 78459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// 88459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org//===----------------------------------------------------------------------===// 98459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// 108459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// This file defines the RAGreedy function pass for register allocation in 118459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// optimized builds. 128459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org// 138459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org//===----------------------------------------------------------------------===// 148459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org 158459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#define DEBUG_TYPE "regalloc" 168459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "AllocationOrder.h" 178459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "InterferenceCache.h" 188459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "LiveDebugVariables.h" 198459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "LiveRangeEdit.h" 208459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "RegAllocBase.h" 218459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "Spiller.h" 22a09ef977b6799f01cd3fd64725fb3069da8be4bcvandebo@chromium.org#include "SpillPlacement.h" 238459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "SplitKit.h" 248459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "VirtRegMap.h" 258459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/ADT/Statistic.h" 268459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Analysis/AliasAnalysis.h" 278459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Function.h" 288459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/PassAnalysisSupport.h" 298459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/CalcSpillWeights.h" 308459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/EdgeBundles.h" 31d90c141feb10c80581c16dbb57fcc97719a0c002vandebo@chromium.org#include "llvm/CodeGen/LiveIntervalAnalysis.h" 328459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/LiveStackAnalysis.h" 338459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/MachineDominators.h" 348459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/MachineFunctionPass.h" 358459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/MachineLoopInfo.h" 368459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/MachineLoopRanges.h" 378459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h" 388459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/Passes.h" 398459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/RegAllocRegistry.h" 408459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/CodeGen/RegisterCoalescer.h" 418459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Target/TargetOptions.h" 428459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Support/Debug.h" 438459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Support/ErrorHandling.h" 448459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Support/raw_ostream.h" 458459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org#include "llvm/Support/Timer.h" 46a09ef977b6799f01cd3fd64725fb3069da8be4bcvandebo@chromium.org 47a09ef977b6799f01cd3fd64725fb3069da8be4bcvandebo@chromium.org#include <queue> 48a09ef977b6799f01cd3fd64725fb3069da8be4bcvandebo@chromium.org 49a09ef977b6799f01cd3fd64725fb3069da8be4bcvandebo@chromium.orgusing namespace llvm; 50d90c141feb10c80581c16dbb57fcc97719a0c002vandebo@chromium.org 51d90c141feb10c80581c16dbb57fcc97719a0c002vandebo@chromium.orgSTATISTIC(NumGlobalSplits, "Number of split global live ranges"); 528459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.orgSTATISTIC(NumLocalSplits, "Number of split local live ranges"); 538459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.orgSTATISTIC(NumEvicted, "Number of interferences evicted"); 548459d4e5e32608ec6da3f2b81731aaeb7b038843vandebo@chromium.org 55static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 56 createGreedyRegisterAllocator); 57 58namespace { 59class RAGreedy : public MachineFunctionPass, 60 public RegAllocBase, 61 private LiveRangeEdit::Delegate { 62 63 // context 64 MachineFunction *MF; 65 BitVector ReservedRegs; 66 67 // analyses 68 SlotIndexes *Indexes; 69 LiveStacks *LS; 70 MachineDominatorTree *DomTree; 71 MachineLoopInfo *Loops; 72 MachineLoopRanges *LoopRanges; 73 EdgeBundles *Bundles; 74 SpillPlacement *SpillPlacer; 75 76 // state 77 std::auto_ptr<Spiller> SpillerInstance; 78 std::priority_queue<std::pair<unsigned, unsigned> > Queue; 79 80 // Live ranges pass through a number of stages as we try to allocate them. 81 // Some of the stages may also create new live ranges: 82 // 83 // - Region splitting. 84 // - Per-block splitting. 85 // - Local splitting. 86 // - Spilling. 87 // 88 // Ranges produced by one of the stages skip the previous stages when they are 89 // dequeued. This improves performance because we can skip interference checks 90 // that are unlikely to give any results. It also guarantees that the live 91 // range splitting algorithm terminates, something that is otherwise hard to 92 // ensure. 93 enum LiveRangeStage { 94 RS_New, ///< Never seen before. 95 RS_First, ///< First time in the queue. 96 RS_Second, ///< Second time in the queue. 97 RS_Global, ///< Produced by global splitting. 98 RS_Local, ///< Produced by local splitting. 99 RS_Spill ///< Produced by spilling. 100 }; 101 102 IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; 103 104 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 105 return LiveRangeStage(LRStage[VirtReg.reg]); 106 } 107 108 template<typename Iterator> 109 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 110 LRStage.resize(MRI->getNumVirtRegs()); 111 for (;Begin != End; ++Begin) { 112 unsigned Reg = (*Begin)->reg; 113 if (LRStage[Reg] == RS_New) 114 LRStage[Reg] = NewStage; 115 } 116 } 117 118 // splitting state. 119 std::auto_ptr<SplitAnalysis> SA; 120 std::auto_ptr<SplitEditor> SE; 121 122 /// Cached per-block interference maps 123 InterferenceCache IntfCache; 124 125 /// All basic blocks where the current register has uses. 126 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 127 128 /// Global live range splitting candidate info. 129 struct GlobalSplitCandidate { 130 unsigned PhysReg; 131 BitVector LiveBundles; 132 SmallVector<unsigned, 8> ActiveBlocks; 133 134 void reset(unsigned Reg) { 135 PhysReg = Reg; 136 LiveBundles.clear(); 137 ActiveBlocks.clear(); 138 } 139 }; 140 141 /// Candidate info for for each PhysReg in AllocationOrder. 142 /// This vector never shrinks, but grows to the size of the largest register 143 /// class. 144 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 145 146 /// For every instruction in SA->UseSlots, store the previous non-copy 147 /// instruction. 148 SmallVector<SlotIndex, 8> PrevSlot; 149 150public: 151 RAGreedy(); 152 153 /// Return the pass name. 154 virtual const char* getPassName() const { 155 return "Greedy Register Allocator"; 156 } 157 158 /// RAGreedy analysis usage. 159 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 160 virtual void releaseMemory(); 161 virtual Spiller &spiller() { return *SpillerInstance; } 162 virtual void enqueue(LiveInterval *LI); 163 virtual LiveInterval *dequeue(); 164 virtual unsigned selectOrSplit(LiveInterval&, 165 SmallVectorImpl<LiveInterval*>&); 166 167 /// Perform register allocation. 168 virtual bool runOnMachineFunction(MachineFunction &mf); 169 170 static char ID; 171 172private: 173 void LRE_WillEraseInstruction(MachineInstr*); 174 bool LRE_CanEraseVirtReg(unsigned); 175 void LRE_WillShrinkVirtReg(unsigned); 176 void LRE_DidCloneVirtReg(unsigned, unsigned); 177 178 float calcSpillCost(); 179 bool addSplitConstraints(InterferenceCache::Cursor, float&); 180 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 181 void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); 182 float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); 183 void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, 184 SmallVectorImpl<LiveInterval*>&); 185 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 186 SlotIndex getPrevMappedIndex(const MachineInstr*); 187 void calcPrevSlots(); 188 unsigned nextSplitPoint(unsigned); 189 bool canEvictInterference(LiveInterval&, unsigned, float&); 190 191 unsigned tryAssign(LiveInterval&, AllocationOrder&, 192 SmallVectorImpl<LiveInterval*>&); 193 unsigned tryEvict(LiveInterval&, AllocationOrder&, 194 SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); 195 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 196 SmallVectorImpl<LiveInterval*>&); 197 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 198 SmallVectorImpl<LiveInterval*>&); 199 unsigned trySplit(LiveInterval&, AllocationOrder&, 200 SmallVectorImpl<LiveInterval*>&); 201}; 202} // end anonymous namespace 203 204char RAGreedy::ID = 0; 205 206// Hysteresis to use when comparing floats. 207// This helps stabilize decisions based on float comparisons. 208const float Hysteresis = 0.98f; 209 210 211FunctionPass* llvm::createGreedyRegisterAllocator() { 212 return new RAGreedy(); 213} 214 215RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { 216 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 217 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 218 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 219 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 220 initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); 221 initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); 222 initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); 223 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 224 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 225 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 226 initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); 227 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 228 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 229 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 230} 231 232void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 233 AU.setPreservesCFG(); 234 AU.addRequired<AliasAnalysis>(); 235 AU.addPreserved<AliasAnalysis>(); 236 AU.addRequired<LiveIntervals>(); 237 AU.addRequired<SlotIndexes>(); 238 AU.addPreserved<SlotIndexes>(); 239 AU.addRequired<LiveDebugVariables>(); 240 AU.addPreserved<LiveDebugVariables>(); 241 if (StrongPHIElim) 242 AU.addRequiredID(StrongPHIEliminationID); 243 AU.addRequiredTransitive<RegisterCoalescer>(); 244 AU.addRequired<CalculateSpillWeights>(); 245 AU.addRequired<LiveStacks>(); 246 AU.addPreserved<LiveStacks>(); 247 AU.addRequired<MachineDominatorTree>(); 248 AU.addPreserved<MachineDominatorTree>(); 249 AU.addRequired<MachineLoopInfo>(); 250 AU.addPreserved<MachineLoopInfo>(); 251 AU.addRequired<MachineLoopRanges>(); 252 AU.addPreserved<MachineLoopRanges>(); 253 AU.addRequired<VirtRegMap>(); 254 AU.addPreserved<VirtRegMap>(); 255 AU.addRequired<EdgeBundles>(); 256 AU.addRequired<SpillPlacement>(); 257 MachineFunctionPass::getAnalysisUsage(AU); 258} 259 260 261//===----------------------------------------------------------------------===// 262// LiveRangeEdit delegate methods 263//===----------------------------------------------------------------------===// 264 265void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { 266 // LRE itself will remove from SlotIndexes and parent basic block. 267 VRM->RemoveMachineInstrFromMaps(MI); 268} 269 270bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 271 if (unsigned PhysReg = VRM->getPhys(VirtReg)) { 272 unassign(LIS->getInterval(VirtReg), PhysReg); 273 return true; 274 } 275 // Unassigned virtreg is probably in the priority queue. 276 // RegAllocBase will erase it after dequeueing. 277 return false; 278} 279 280void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 281 unsigned PhysReg = VRM->getPhys(VirtReg); 282 if (!PhysReg) 283 return; 284 285 // Register is assigned, put it back on the queue for reassignment. 286 LiveInterval &LI = LIS->getInterval(VirtReg); 287 unassign(LI, PhysReg); 288 enqueue(&LI); 289} 290 291void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 292 // LRE may clone a virtual register because dead code elimination causes it to 293 // be split into connected components. Ensure that the new register gets the 294 // same stage as the parent. 295 LRStage.grow(New); 296 LRStage[New] = LRStage[Old]; 297} 298 299void RAGreedy::releaseMemory() { 300 SpillerInstance.reset(0); 301 LRStage.clear(); 302 GlobalCand.clear(); 303 RegAllocBase::releaseMemory(); 304} 305 306void RAGreedy::enqueue(LiveInterval *LI) { 307 // Prioritize live ranges by size, assigning larger ranges first. 308 // The queue holds (size, reg) pairs. 309 const unsigned Size = LI->getSize(); 310 const unsigned Reg = LI->reg; 311 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 312 "Can only enqueue virtual registers"); 313 unsigned Prio; 314 315 LRStage.grow(Reg); 316 if (LRStage[Reg] == RS_New) 317 LRStage[Reg] = RS_First; 318 319 if (LRStage[Reg] == RS_Second) 320 // Unsplit ranges that couldn't be allocated immediately are deferred until 321 // everything else has been allocated. Long ranges are allocated last so 322 // they are split against realistic interference. 323 Prio = (1u << 31) - Size; 324 else { 325 // Everything else is allocated in long->short order. Long ranges that don't 326 // fit should be spilled ASAP so they don't create interference. 327 Prio = (1u << 31) + Size; 328 329 // Boost ranges that have a physical register hint. 330 if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) 331 Prio |= (1u << 30); 332 } 333 334 Queue.push(std::make_pair(Prio, Reg)); 335} 336 337LiveInterval *RAGreedy::dequeue() { 338 if (Queue.empty()) 339 return 0; 340 LiveInterval *LI = &LIS->getInterval(Queue.top().second); 341 Queue.pop(); 342 return LI; 343} 344 345 346//===----------------------------------------------------------------------===// 347// Direct Assignment 348//===----------------------------------------------------------------------===// 349 350/// tryAssign - Try to assign VirtReg to an available register. 351unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 352 AllocationOrder &Order, 353 SmallVectorImpl<LiveInterval*> &NewVRegs) { 354 Order.rewind(); 355 unsigned PhysReg; 356 while ((PhysReg = Order.next())) 357 if (!checkPhysRegInterference(VirtReg, PhysReg)) 358 break; 359 if (!PhysReg || Order.isHint(PhysReg)) 360 return PhysReg; 361 362 // PhysReg is available. Try to evict interference from a cheaper alternative. 363 unsigned Cost = TRI->getCostPerUse(PhysReg); 364 365 // Most registers have 0 additional cost. 366 if (!Cost) 367 return PhysReg; 368 369 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 370 << '\n'); 371 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 372 return CheapReg ? CheapReg : PhysReg; 373} 374 375 376//===----------------------------------------------------------------------===// 377// Interference eviction 378//===----------------------------------------------------------------------===// 379 380/// canEvict - Return true if all interferences between VirtReg and PhysReg can 381/// be evicted. 382/// Return false if any interference is heavier than MaxWeight. 383/// On return, set MaxWeight to the maximal spill weight of an interference. 384bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 385 float &MaxWeight) { 386 float Weight = 0; 387 for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { 388 LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); 389 // If there is 10 or more interferences, chances are one is heavier. 390 if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) 391 return false; 392 393 // Check if any interfering live range is heavier than MaxWeight. 394 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 395 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 396 if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) 397 return false; 398 if (Intf->weight >= MaxWeight) 399 return false; 400 Weight = std::max(Weight, Intf->weight); 401 } 402 } 403 MaxWeight = Weight; 404 return true; 405} 406 407/// tryEvict - Try to evict all interferences for a physreg. 408/// @param VirtReg Currently unassigned virtual register. 409/// @param Order Physregs to try. 410/// @return Physreg to assign VirtReg, or 0. 411unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 412 AllocationOrder &Order, 413 SmallVectorImpl<LiveInterval*> &NewVRegs, 414 unsigned CostPerUseLimit) { 415 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 416 417 // Keep track of the lightest single interference seen so far. 418 float BestWeight = VirtReg.weight; 419 unsigned BestPhys = 0; 420 421 Order.rewind(); 422 while (unsigned PhysReg = Order.next()) { 423 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 424 continue; 425 // The first use of a register in a function has cost 1. 426 if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) 427 continue; 428 429 float Weight = BestWeight; 430 if (!canEvictInterference(VirtReg, PhysReg, Weight)) 431 continue; 432 433 // This is an eviction candidate. 434 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " 435 << Weight << '\n'); 436 if (BestPhys && Weight >= BestWeight) 437 continue; 438 439 // Best so far. 440 BestPhys = PhysReg; 441 BestWeight = Weight; 442 // Stop if the hint can be used. 443 if (Order.isHint(PhysReg)) 444 break; 445 } 446 447 if (!BestPhys) 448 return 0; 449 450 DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); 451 for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { 452 LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); 453 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 454 for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { 455 LiveInterval *Intf = Q.interferingVRegs()[i]; 456 unassign(*Intf, VRM->getPhys(Intf->reg)); 457 ++NumEvicted; 458 NewVRegs.push_back(Intf); 459 } 460 } 461 return BestPhys; 462} 463 464 465//===----------------------------------------------------------------------===// 466// Region Splitting 467//===----------------------------------------------------------------------===// 468 469/// addSplitConstraints - Fill out the SplitConstraints vector based on the 470/// interference pattern in Physreg and its aliases. Add the constraints to 471/// SpillPlacement and return the static cost of this split in Cost, assuming 472/// that all preferences in SplitConstraints are met. 473/// Return false if there are no bundles with positive bias. 474bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 475 float &Cost) { 476 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 477 478 // Reset interference dependent info. 479 SplitConstraints.resize(UseBlocks.size()); 480 float StaticCost = 0; 481 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 482 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 483 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 484 485 BC.Number = BI.MBB->getNumber(); 486 Intf.moveToBlock(BC.Number); 487 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 488 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 489 490 if (!Intf.hasInterference()) 491 continue; 492 493 // Number of spill code instructions to insert. 494 unsigned Ins = 0; 495 496 // Interference for the live-in value. 497 if (BI.LiveIn) { 498 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) 499 BC.Entry = SpillPlacement::MustSpill, ++Ins; 500 else if (Intf.first() < BI.FirstUse) 501 BC.Entry = SpillPlacement::PrefSpill, ++Ins; 502 else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) 503 ++Ins; 504 } 505 506 // Interference for the live-out value. 507 if (BI.LiveOut) { 508 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) 509 BC.Exit = SpillPlacement::MustSpill, ++Ins; 510 else if (Intf.last() > BI.LastUse) 511 BC.Exit = SpillPlacement::PrefSpill, ++Ins; 512 else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) 513 ++Ins; 514 } 515 516 // Accumulate the total frequency of inserted spill code. 517 if (Ins) 518 StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); 519 } 520 Cost = StaticCost; 521 522 // Add constraints for use-blocks. Note that these are the only constraints 523 // that may add a positive bias, it is downhill from here. 524 SpillPlacer->addConstraints(SplitConstraints); 525 return SpillPlacer->scanActiveBundles(); 526} 527 528 529/// addThroughConstraints - Add constraints and links to SpillPlacer from the 530/// live-through blocks in Blocks. 531void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 532 ArrayRef<unsigned> Blocks) { 533 const unsigned GroupSize = 8; 534 SpillPlacement::BlockConstraint BCS[GroupSize]; 535 unsigned TBS[GroupSize]; 536 unsigned B = 0, T = 0; 537 538 for (unsigned i = 0; i != Blocks.size(); ++i) { 539 unsigned Number = Blocks[i]; 540 Intf.moveToBlock(Number); 541 542 if (!Intf.hasInterference()) { 543 assert(T < GroupSize && "Array overflow"); 544 TBS[T] = Number; 545 if (++T == GroupSize) { 546 SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); 547 T = 0; 548 } 549 continue; 550 } 551 552 assert(B < GroupSize && "Array overflow"); 553 BCS[B].Number = Number; 554 555 // Interference for the live-in value. 556 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 557 BCS[B].Entry = SpillPlacement::MustSpill; 558 else 559 BCS[B].Entry = SpillPlacement::PrefSpill; 560 561 // Interference for the live-out value. 562 if (Intf.last() >= SA->getLastSplitPoint(Number)) 563 BCS[B].Exit = SpillPlacement::MustSpill; 564 else 565 BCS[B].Exit = SpillPlacement::PrefSpill; 566 567 if (++B == GroupSize) { 568 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 569 SpillPlacer->addConstraints(Array); 570 B = 0; 571 } 572 } 573 574 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 575 SpillPlacer->addConstraints(Array); 576 SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); 577} 578 579void RAGreedy::growRegion(GlobalSplitCandidate &Cand, 580 InterferenceCache::Cursor Intf) { 581 // Keep track of through blocks that have not been added to SpillPlacer. 582 BitVector Todo = SA->getThroughBlocks(); 583 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 584 unsigned AddedTo = 0; 585#ifndef NDEBUG 586 unsigned Visited = 0; 587#endif 588 589 for (;;) { 590 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 591 if (NewBundles.empty()) 592 break; 593 // Find new through blocks in the periphery of PrefRegBundles. 594 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 595 unsigned Bundle = NewBundles[i]; 596 // Look at all blocks connected to Bundle in the full graph. 597 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 598 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 599 I != E; ++I) { 600 unsigned Block = *I; 601 if (!Todo.test(Block)) 602 continue; 603 Todo.reset(Block); 604 // This is a new through block. Add it to SpillPlacer later. 605 ActiveBlocks.push_back(Block); 606#ifndef NDEBUG 607 ++Visited; 608#endif 609 } 610 } 611 // Any new blocks to add? 612 if (ActiveBlocks.size() > AddedTo) { 613 ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], 614 ActiveBlocks.size() - AddedTo); 615 addThroughConstraints(Intf, Add); 616 AddedTo = ActiveBlocks.size(); 617 } 618 // Perhaps iterating can enable more bundles? 619 SpillPlacer->iterate(); 620 } 621 DEBUG(dbgs() << ", v=" << Visited); 622} 623 624/// calcSpillCost - Compute how expensive it would be to split the live range in 625/// SA around all use blocks instead of forming bundle regions. 626float RAGreedy::calcSpillCost() { 627 float Cost = 0; 628 const LiveInterval &LI = SA->getParent(); 629 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 630 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 631 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 632 unsigned Number = BI.MBB->getNumber(); 633 // We normally only need one spill instruction - a load or a store. 634 Cost += SpillPlacer->getBlockFrequency(Number); 635 636 // Unless the value is redefined in the block. 637 if (BI.LiveIn && BI.LiveOut) { 638 SlotIndex Start, Stop; 639 tie(Start, Stop) = Indexes->getMBBRange(Number); 640 LiveInterval::const_iterator I = LI.find(Start); 641 assert(I != LI.end() && "Expected live-in value"); 642 // Is there a different live-out value? If so, we need an extra spill 643 // instruction. 644 if (I->end < Stop) 645 Cost += SpillPlacer->getBlockFrequency(Number); 646 } 647 } 648 return Cost; 649} 650 651/// calcGlobalSplitCost - Return the global split cost of following the split 652/// pattern in LiveBundles. This cost should be added to the local cost of the 653/// interference pattern in SplitConstraints. 654/// 655float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, 656 InterferenceCache::Cursor Intf) { 657 float GlobalCost = 0; 658 const BitVector &LiveBundles = Cand.LiveBundles; 659 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 660 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 661 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 662 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 663 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 664 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 665 unsigned Ins = 0; 666 667 if (BI.LiveIn) 668 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 669 if (BI.LiveOut) 670 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 671 if (Ins) 672 GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); 673 } 674 675 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 676 unsigned Number = Cand.ActiveBlocks[i]; 677 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 678 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 679 if (!RegIn && !RegOut) 680 continue; 681 if (RegIn && RegOut) { 682 // We need double spill code if this block has interference. 683 Intf.moveToBlock(Number); 684 if (Intf.hasInterference()) 685 GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); 686 continue; 687 } 688 // live-in / stack-out or stack-in live-out. 689 GlobalCost += SpillPlacer->getBlockFrequency(Number); 690 } 691 return GlobalCost; 692} 693 694/// splitAroundRegion - Split VirtReg around the region determined by 695/// LiveBundles. Make an effort to avoid interference from PhysReg. 696/// 697/// The 'register' interval is going to contain as many uses as possible while 698/// avoiding interference. The 'stack' interval is the complement constructed by 699/// SplitEditor. It will contain the rest. 700/// 701void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, 702 GlobalSplitCandidate &Cand, 703 SmallVectorImpl<LiveInterval*> &NewVRegs) { 704 const BitVector &LiveBundles = Cand.LiveBundles; 705 706 DEBUG({ 707 dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) 708 << " with bundles"; 709 for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) 710 dbgs() << " EB#" << i; 711 dbgs() << ".\n"; 712 }); 713 714 InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); 715 LiveRangeEdit LREdit(VirtReg, NewVRegs, this); 716 SE->reset(LREdit); 717 718 // Create the main cross-block interval. 719 const unsigned MainIntv = SE->openIntv(); 720 721 // First add all defs that are live out of a block. 722 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 723 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 724 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 725 bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; 726 bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; 727 728 // Create separate intervals for isolated blocks with multiple uses. 729 if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { 730 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 731 SE->splitSingleBlock(BI); 732 SE->selectIntv(MainIntv); 733 continue; 734 } 735 736 // Should the register be live out? 737 if (!BI.LiveOut || !RegOut) 738 continue; 739 740 SlotIndex Start, Stop; 741 tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); 742 Intf.moveToBlock(BI.MBB->getNumber()); 743 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" 744 << Bundles->getBundle(BI.MBB->getNumber(), 1) 745 << " [" << Start << ';' 746 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop 747 << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); 748 749 // The interference interval should either be invalid or overlap MBB. 750 assert((!Intf.hasInterference() || Intf.first() < Stop) 751 && "Bad interference"); 752 assert((!Intf.hasInterference() || Intf.last() > Start) 753 && "Bad interference"); 754 755 // Check interference leaving the block. 756 if (!Intf.hasInterference()) { 757 // Block is interference-free. 758 DEBUG(dbgs() << ", no interference"); 759 if (!BI.LiveThrough) { 760 DEBUG(dbgs() << ", not live-through.\n"); 761 SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); 762 continue; 763 } 764 if (!RegIn) { 765 // Block is live-through, but entry bundle is on the stack. 766 // Reload just before the first use. 767 DEBUG(dbgs() << ", not live-in, enter before first use.\n"); 768 SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); 769 continue; 770 } 771 DEBUG(dbgs() << ", live-through.\n"); 772 continue; 773 } 774 775 // Block has interference. 776 DEBUG(dbgs() << ", interference to " << Intf.last()); 777 778 if (!BI.LiveThrough && Intf.last() <= BI.Def) { 779 // The interference doesn't reach the outgoing segment. 780 DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); 781 SE->useIntv(BI.Def, Stop); 782 continue; 783 } 784 785 SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); 786 if (Intf.last().getBoundaryIndex() < BI.LastUse) { 787 // There are interference-free uses at the end of the block. 788 // Find the first use that can get the live-out register. 789 SmallVectorImpl<SlotIndex>::const_iterator UI = 790 std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), 791 Intf.last().getBoundaryIndex()); 792 assert(UI != SA->UseSlots.end() && "Couldn't find last use"); 793 SlotIndex Use = *UI; 794 assert(Use <= BI.LastUse && "Couldn't find last use"); 795 // Only attempt a split befroe the last split point. 796 if (Use.getBaseIndex() <= LastSplitPoint) { 797 DEBUG(dbgs() << ", free use at " << Use << ".\n"); 798 SlotIndex SegStart = SE->enterIntvBefore(Use); 799 assert(SegStart >= Intf.last() && "Couldn't avoid interference"); 800 assert(SegStart < LastSplitPoint && "Impossible split point"); 801 SE->useIntv(SegStart, Stop); 802 continue; 803 } 804 } 805 806 // Interference is after the last use. 807 DEBUG(dbgs() << " after last use.\n"); 808 SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); 809 assert(SegStart >= Intf.last() && "Couldn't avoid interference"); 810 } 811 812 // Now all defs leading to live bundles are handled, do everything else. 813 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 814 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 815 bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; 816 bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; 817 818 // Is the register live-in? 819 if (!BI.LiveIn || !RegIn) 820 continue; 821 822 // We have an incoming register. Check for interference. 823 SlotIndex Start, Stop; 824 tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); 825 Intf.moveToBlock(BI.MBB->getNumber()); 826 DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) 827 << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' 828 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop 829 << ')'); 830 831 // Check interference entering the block. 832 if (!Intf.hasInterference()) { 833 // Block is interference-free. 834 DEBUG(dbgs() << ", no interference"); 835 if (!BI.LiveThrough) { 836 DEBUG(dbgs() << ", killed in block.\n"); 837 SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); 838 continue; 839 } 840 if (!RegOut) { 841 SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); 842 // Block is live-through, but exit bundle is on the stack. 843 // Spill immediately after the last use. 844 if (BI.LastUse < LastSplitPoint) { 845 DEBUG(dbgs() << ", uses, stack-out.\n"); 846 SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); 847 continue; 848 } 849 // The last use is after the last split point, it is probably an 850 // indirect jump. 851 DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " 852 << LastSplitPoint << ", stack-out.\n"); 853 SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); 854 SE->useIntv(Start, SegEnd); 855 // Run a double interval from the split to the last use. 856 // This makes it possible to spill the complement without affecting the 857 // indirect branch. 858 SE->overlapIntv(SegEnd, BI.LastUse); 859 continue; 860 } 861 // Register is live-through. 862 DEBUG(dbgs() << ", uses, live-through.\n"); 863 SE->useIntv(Start, Stop); 864 continue; 865 } 866 867 // Block has interference. 868 DEBUG(dbgs() << ", interference from " << Intf.first()); 869 870 if (!BI.LiveThrough && Intf.first() >= BI.Kill) { 871 // The interference doesn't reach the outgoing segment. 872 DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); 873 SE->useIntv(Start, BI.Kill); 874 continue; 875 } 876 877 if (Intf.first().getBaseIndex() > BI.FirstUse) { 878 // There are interference-free uses at the beginning of the block. 879 // Find the last use that can get the register. 880 SmallVectorImpl<SlotIndex>::const_iterator UI = 881 std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), 882 Intf.first().getBaseIndex()); 883 assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); 884 SlotIndex Use = (--UI)->getBoundaryIndex(); 885 DEBUG(dbgs() << ", free use at " << *UI << ".\n"); 886 SlotIndex SegEnd = SE->leaveIntvAfter(Use); 887 assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); 888 SE->useIntv(Start, SegEnd); 889 continue; 890 } 891 892 // Interference is before the first use. 893 DEBUG(dbgs() << " before first use.\n"); 894 SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); 895 assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); 896 } 897 898 // Handle live-through blocks. 899 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 900 unsigned Number = Cand.ActiveBlocks[i]; 901 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 902 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 903 DEBUG(dbgs() << "Live through BB#" << Number << '\n'); 904 if (RegIn && RegOut) { 905 Intf.moveToBlock(Number); 906 if (!Intf.hasInterference()) { 907 SE->useIntv(Indexes->getMBBStartIdx(Number), 908 Indexes->getMBBEndIdx(Number)); 909 continue; 910 } 911 } 912 MachineBasicBlock *MBB = MF->getBlockNumbered(Number); 913 if (RegIn) 914 SE->leaveIntvAtTop(*MBB); 915 if (RegOut) 916 SE->enterIntvAtEnd(*MBB); 917 } 918 919 ++NumGlobalSplits; 920 921 SmallVector<unsigned, 8> IntvMap; 922 SE->finish(&IntvMap); 923 LRStage.resize(MRI->getNumVirtRegs()); 924 unsigned OrigBlocks = SA->getNumThroughBlocks() + SA->getUseBlocks().size(); 925 926 // Sort out the new intervals created by splitting. We get four kinds: 927 // - Remainder intervals should not be split again. 928 // - Candidate intervals can be assigned to Cand.PhysReg. 929 // - Block-local splits are candidates for local splitting. 930 // - DCE leftovers should go back on the queue. 931 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 932 unsigned Reg = LREdit.get(i)->reg; 933 934 // Ignore old intervals from DCE. 935 if (LRStage[Reg] != RS_New) 936 continue; 937 938 // Remainder interval. Don't try splitting again, spill if it doesn't 939 // allocate. 940 if (IntvMap[i] == 0) { 941 LRStage[Reg] = RS_Global; 942 continue; 943 } 944 945 // Main interval. Allow repeated splitting as long as the number of live 946 // blocks is strictly decreasing. 947 if (IntvMap[i] == MainIntv) { 948 if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { 949 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 950 << " blocks as original.\n"); 951 // Don't allow repeated splitting as a safe guard against looping. 952 LRStage[Reg] = RS_Global; 953 } 954 continue; 955 } 956 957 // Other intervals are treated as new. This includes local intervals created 958 // for blocks with multiple uses, and anything created by DCE. 959 } 960 961 if (VerifyEnabled) 962 MF->verify(this, "After splitting live range around region"); 963} 964 965unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 966 SmallVectorImpl<LiveInterval*> &NewVRegs) { 967 float BestCost = Hysteresis * calcSpillCost(); 968 DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); 969 const unsigned NoCand = ~0u; 970 unsigned BestCand = NoCand; 971 972 Order.rewind(); 973 for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { 974 if (GlobalCand.size() <= Cand) 975 GlobalCand.resize(Cand+1); 976 GlobalCand[Cand].reset(PhysReg); 977 978 SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); 979 float Cost; 980 InterferenceCache::Cursor Intf(IntfCache, PhysReg); 981 if (!addSplitConstraints(Intf, Cost)) { 982 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 983 continue; 984 } 985 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); 986 if (Cost >= BestCost) { 987 DEBUG({ 988 if (BestCand == NoCand) 989 dbgs() << " worse than no bundles\n"; 990 else 991 dbgs() << " worse than " 992 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 993 }); 994 continue; 995 } 996 growRegion(GlobalCand[Cand], Intf); 997 998 SpillPlacer->finish(); 999 1000 // No live bundles, defer to splitSingleBlocks(). 1001 if (!GlobalCand[Cand].LiveBundles.any()) { 1002 DEBUG(dbgs() << " no bundles.\n"); 1003 continue; 1004 } 1005 1006 Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); 1007 DEBUG({ 1008 dbgs() << ", total = " << Cost << " with bundles"; 1009 for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; 1010 i = GlobalCand[Cand].LiveBundles.find_next(i)) 1011 dbgs() << " EB#" << i; 1012 dbgs() << ".\n"; 1013 }); 1014 if (Cost < BestCost) { 1015 BestCand = Cand; 1016 BestCost = Hysteresis * Cost; // Prevent rounding effects. 1017 } 1018 } 1019 1020 if (BestCand == NoCand) 1021 return 0; 1022 1023 splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); 1024 return 0; 1025} 1026 1027 1028//===----------------------------------------------------------------------===// 1029// Local Splitting 1030//===----------------------------------------------------------------------===// 1031 1032 1033/// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1034/// in order to use PhysReg between two entries in SA->UseSlots. 1035/// 1036/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1037/// 1038void RAGreedy::calcGapWeights(unsigned PhysReg, 1039 SmallVectorImpl<float> &GapWeight) { 1040 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1041 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1042 const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; 1043 const unsigned NumGaps = Uses.size()-1; 1044 1045 // Start and end points for the interference check. 1046 SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; 1047 SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; 1048 1049 GapWeight.assign(NumGaps, 0.0f); 1050 1051 // Add interference from each overlapping register. 1052 for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { 1053 if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI) 1054 .checkInterference()) 1055 continue; 1056 1057 // We know that VirtReg is a continuous interval from FirstUse to LastUse, 1058 // so we don't need InterferenceQuery. 1059 // 1060 // Interference that overlaps an instruction is counted in both gaps 1061 // surrounding the instruction. The exception is interference before 1062 // StartIdx and after StopIdx. 1063 // 1064 LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); 1065 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1066 // Skip the gaps before IntI. 1067 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1068 if (++Gap == NumGaps) 1069 break; 1070 if (Gap == NumGaps) 1071 break; 1072 1073 // Update the gaps covered by IntI. 1074 const float weight = IntI.value()->weight; 1075 for (; Gap != NumGaps; ++Gap) { 1076 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1077 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1078 break; 1079 } 1080 if (Gap == NumGaps) 1081 break; 1082 } 1083 } 1084} 1085 1086/// getPrevMappedIndex - Return the slot index of the last non-copy instruction 1087/// before MI that has a slot index. If MI is the first mapped instruction in 1088/// its block, return the block start index instead. 1089/// 1090SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) { 1091 assert(MI && "Missing MachineInstr"); 1092 const MachineBasicBlock *MBB = MI->getParent(); 1093 MachineBasicBlock::const_iterator B = MBB->begin(), I = MI; 1094 while (I != B) 1095 if (!(--I)->isDebugValue() && !I->isCopy()) 1096 return Indexes->getInstructionIndex(I); 1097 return Indexes->getMBBStartIdx(MBB); 1098} 1099 1100/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous 1101/// real non-copy instruction for each instruction in SA->UseSlots. 1102/// 1103void RAGreedy::calcPrevSlots() { 1104 const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; 1105 PrevSlot.clear(); 1106 PrevSlot.reserve(Uses.size()); 1107 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 1108 const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]); 1109 PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex()); 1110 } 1111} 1112 1113/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may 1114/// be beneficial to split before UseSlots[i]. 1115/// 1116/// 0 is always a valid split point 1117unsigned RAGreedy::nextSplitPoint(unsigned i) { 1118 const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; 1119 const unsigned Size = Uses.size(); 1120 assert(i != Size && "No split points after the end"); 1121 // Allow split before i when Uses[i] is not adjacent to the previous use. 1122 while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex()) 1123 ; 1124 return i; 1125} 1126 1127/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1128/// basic block. 1129/// 1130unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1131 SmallVectorImpl<LiveInterval*> &NewVRegs) { 1132 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1133 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1134 1135 // Note that it is possible to have an interval that is live-in or live-out 1136 // while only covering a single block - A phi-def can use undef values from 1137 // predecessors, and the block could be a single-block loop. 1138 // We don't bother doing anything clever about such a case, we simply assume 1139 // that the interval is continuous from FirstUse to LastUse. We should make 1140 // sure that we don't do anything illegal to such an interval, though. 1141 1142 const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; 1143 if (Uses.size() <= 2) 1144 return 0; 1145 const unsigned NumGaps = Uses.size()-1; 1146 1147 DEBUG({ 1148 dbgs() << "tryLocalSplit: "; 1149 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1150 dbgs() << ' ' << SA->UseSlots[i]; 1151 dbgs() << '\n'; 1152 }); 1153 1154 // For every use, find the previous mapped non-copy instruction. 1155 // We use this to detect valid split points, and to estimate new interval 1156 // sizes. 1157 calcPrevSlots(); 1158 1159 unsigned BestBefore = NumGaps; 1160 unsigned BestAfter = 0; 1161 float BestDiff = 0; 1162 1163 const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); 1164 SmallVector<float, 8> GapWeight; 1165 1166 Order.rewind(); 1167 while (unsigned PhysReg = Order.next()) { 1168 // Keep track of the largest spill weight that would need to be evicted in 1169 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1170 calcGapWeights(PhysReg, GapWeight); 1171 1172 // Try to find the best sequence of gaps to close. 1173 // The new spill weight must be larger than any gap interference. 1174 1175 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1176 unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1; 1177 1178 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1179 // It is the spill weight that needs to be evicted. 1180 float MaxGap = GapWeight[0]; 1181 for (unsigned i = 1; i != SplitAfter; ++i) 1182 MaxGap = std::max(MaxGap, GapWeight[i]); 1183 1184 for (;;) { 1185 // Live before/after split? 1186 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1187 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1188 1189 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1190 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1191 << " i=" << MaxGap); 1192 1193 // Stop before the interval gets so big we wouldn't be making progress. 1194 if (!LiveBefore && !LiveAfter) { 1195 DEBUG(dbgs() << " all\n"); 1196 break; 1197 } 1198 // Should the interval be extended or shrunk? 1199 bool Shrink = true; 1200 if (MaxGap < HUGE_VALF) { 1201 // Estimate the new spill weight. 1202 // 1203 // Each instruction reads and writes the register, except the first 1204 // instr doesn't read when !FirstLive, and the last instr doesn't write 1205 // when !LastLive. 1206 // 1207 // We will be inserting copies before and after, so the total number of 1208 // reads and writes is 2 * EstUses. 1209 // 1210 const unsigned EstUses = 2*(SplitAfter - SplitBefore) + 1211 2*(LiveBefore + LiveAfter); 1212 1213 // Try to guess the size of the new interval. This should be trivial, 1214 // but the slot index of an inserted copy can be a lot smaller than the 1215 // instruction it is inserted before if there are many dead indexes 1216 // between them. 1217 // 1218 // We measure the distance from the instruction before SplitBefore to 1219 // get a conservative estimate. 1220 // 1221 // The final distance can still be different if inserting copies 1222 // triggers a slot index renumbering. 1223 // 1224 const float EstWeight = normalizeSpillWeight(blockFreq * EstUses, 1225 PrevSlot[SplitBefore].distance(Uses[SplitAfter])); 1226 // Would this split be possible to allocate? 1227 // Never allocate all gaps, we wouldn't be making progress. 1228 DEBUG(dbgs() << " w=" << EstWeight); 1229 if (EstWeight * Hysteresis >= MaxGap) { 1230 Shrink = false; 1231 float Diff = EstWeight - MaxGap; 1232 if (Diff > BestDiff) { 1233 DEBUG(dbgs() << " (best)"); 1234 BestDiff = Hysteresis * Diff; 1235 BestBefore = SplitBefore; 1236 BestAfter = SplitAfter; 1237 } 1238 } 1239 } 1240 1241 // Try to shrink. 1242 if (Shrink) { 1243 SplitBefore = nextSplitPoint(SplitBefore); 1244 if (SplitBefore < SplitAfter) { 1245 DEBUG(dbgs() << " shrink\n"); 1246 // Recompute the max when necessary. 1247 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1248 MaxGap = GapWeight[SplitBefore]; 1249 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1250 MaxGap = std::max(MaxGap, GapWeight[i]); 1251 } 1252 continue; 1253 } 1254 MaxGap = 0; 1255 } 1256 1257 // Try to extend the interval. 1258 if (SplitAfter >= NumGaps) { 1259 DEBUG(dbgs() << " end\n"); 1260 break; 1261 } 1262 1263 DEBUG(dbgs() << " extend\n"); 1264 for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1; 1265 SplitAfter != e; ++SplitAfter) 1266 MaxGap = std::max(MaxGap, GapWeight[SplitAfter]); 1267 continue; 1268 } 1269 } 1270 1271 // Didn't find any candidates? 1272 if (BestBefore == NumGaps) 1273 return 0; 1274 1275 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1276 << '-' << Uses[BestAfter] << ", " << BestDiff 1277 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1278 1279 LiveRangeEdit LREdit(VirtReg, NewVRegs, this); 1280 SE->reset(LREdit); 1281 1282 SE->openIntv(); 1283 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1284 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1285 SE->useIntv(SegStart, SegStop); 1286 SE->finish(); 1287 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); 1288 ++NumLocalSplits; 1289 1290 return 0; 1291} 1292 1293//===----------------------------------------------------------------------===// 1294// Live Range Splitting 1295//===----------------------------------------------------------------------===// 1296 1297/// trySplit - Try to split VirtReg or one of its interferences, making it 1298/// assignable. 1299/// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1300unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1301 SmallVectorImpl<LiveInterval*>&NewVRegs) { 1302 // Local intervals are handled separately. 1303 if (LIS->intervalIsInOneMBB(VirtReg)) { 1304 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1305 SA->analyze(&VirtReg); 1306 return tryLocalSplit(VirtReg, Order, NewVRegs); 1307 } 1308 1309 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1310 1311 // Don't iterate global splitting. 1312 // Move straight to spilling if this range was produced by a global split. 1313 if (getStage(VirtReg) >= RS_Global) 1314 return 0; 1315 1316 SA->analyze(&VirtReg); 1317 1318 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1319 // coalescer. That may cause the range to become allocatable which means that 1320 // tryRegionSplit won't be making progress. This check should be replaced with 1321 // an assertion when the coalescer is fixed. 1322 if (SA->didRepairRange()) { 1323 // VirtReg has changed, so all cached queries are invalid. 1324 Order.rewind(); 1325 while (unsigned PhysReg = Order.next()) 1326 query(VirtReg, PhysReg).clear(); 1327 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1328 return PhysReg; 1329 } 1330 1331 // First try to split around a region spanning multiple blocks. 1332 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1333 if (PhysReg || !NewVRegs.empty()) 1334 return PhysReg; 1335 1336 // Then isolate blocks with multiple uses. 1337 SplitAnalysis::BlockPtrSet Blocks; 1338 if (SA->getMultiUseBlocks(Blocks)) { 1339 LiveRangeEdit LREdit(VirtReg, NewVRegs, this); 1340 SE->reset(LREdit); 1341 SE->splitSingleBlocks(Blocks); 1342 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); 1343 if (VerifyEnabled) 1344 MF->verify(this, "After splitting live range around basic blocks"); 1345 } 1346 1347 // Don't assign any physregs. 1348 return 0; 1349} 1350 1351 1352//===----------------------------------------------------------------------===// 1353// Main Entry Point 1354//===----------------------------------------------------------------------===// 1355 1356unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 1357 SmallVectorImpl<LiveInterval*> &NewVRegs) { 1358 // First try assigning a free register. 1359 AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); 1360 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1361 return PhysReg; 1362 1363 if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) 1364 return PhysReg; 1365 1366 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 1367 1368 // The first time we see a live range, don't try to split or spill. 1369 // Wait until the second time, when all smaller ranges have been allocated. 1370 // This gives a better picture of the interference to split around. 1371 LiveRangeStage Stage = getStage(VirtReg); 1372 if (Stage == RS_First) { 1373 LRStage[VirtReg.reg] = RS_Second; 1374 DEBUG(dbgs() << "wait for second round\n"); 1375 NewVRegs.push_back(&VirtReg); 1376 return 0; 1377 } 1378 1379 assert(Stage < RS_Spill && "Cannot allocate after spilling"); 1380 1381 // Try splitting VirtReg or interferences. 1382 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 1383 if (PhysReg || !NewVRegs.empty()) 1384 return PhysReg; 1385 1386 // Finally spill VirtReg itself. 1387 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 1388 LiveRangeEdit LRE(VirtReg, NewVRegs, this); 1389 spiller().spill(LRE); 1390 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); 1391 1392 if (VerifyEnabled) 1393 MF->verify(this, "After spilling"); 1394 1395 // The live virtual register requesting allocation was spilled, so tell 1396 // the caller not to allocate anything during this round. 1397 return 0; 1398} 1399 1400bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 1401 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 1402 << "********** Function: " 1403 << ((Value*)mf.getFunction())->getName() << '\n'); 1404 1405 MF = &mf; 1406 if (VerifyEnabled) 1407 MF->verify(this, "Before greedy register allocator"); 1408 1409 RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); 1410 Indexes = &getAnalysis<SlotIndexes>(); 1411 DomTree = &getAnalysis<MachineDominatorTree>(); 1412 ReservedRegs = TRI->getReservedRegs(*MF); 1413 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 1414 Loops = &getAnalysis<MachineLoopInfo>(); 1415 LoopRanges = &getAnalysis<MachineLoopRanges>(); 1416 Bundles = &getAnalysis<EdgeBundles>(); 1417 SpillPlacer = &getAnalysis<SpillPlacement>(); 1418 1419 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 1420 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); 1421 LRStage.clear(); 1422 LRStage.resize(MRI->getNumVirtRegs()); 1423 IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); 1424 1425 allocatePhysRegs(); 1426 addMBBLiveIns(MF); 1427 LIS->addKillFlags(); 1428 1429 // Run rewriter 1430 { 1431 NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); 1432 VRM->rewrite(Indexes); 1433 } 1434 1435 // Write out new DBG_VALUE instructions. 1436 getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); 1437 1438 // The pass output is in VirtRegMap. Release all the transient data. 1439 releaseMemory(); 1440 1441 return true; 1442} 1443