RegAllocGreedy.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the RAGreedy function pass for register allocation in 11// optimized builds. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/CodeGen/Passes.h" 16#include "AllocationOrder.h" 17#include "InterferenceCache.h" 18#include "LiveDebugVariables.h" 19#include "RegAllocBase.h" 20#include "SpillPlacement.h" 21#include "Spiller.h" 22#include "SplitKit.h" 23#include "llvm/ADT/Statistic.h" 24#include "llvm/Analysis/AliasAnalysis.h" 25#include "llvm/CodeGen/CalcSpillWeights.h" 26#include "llvm/CodeGen/EdgeBundles.h" 27#include "llvm/CodeGen/LiveIntervalAnalysis.h" 28#include "llvm/CodeGen/LiveRangeEdit.h" 29#include "llvm/CodeGen/LiveRegMatrix.h" 30#include "llvm/CodeGen/LiveStackAnalysis.h" 31#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 32#include "llvm/CodeGen/MachineDominators.h" 33#include "llvm/CodeGen/MachineFunctionPass.h" 34#include "llvm/CodeGen/MachineLoopInfo.h" 35#include "llvm/CodeGen/MachineRegisterInfo.h" 36#include "llvm/CodeGen/RegAllocRegistry.h" 37#include "llvm/CodeGen/RegisterClassInfo.h" 38#include "llvm/CodeGen/VirtRegMap.h" 39#include "llvm/IR/LLVMContext.h" 40#include "llvm/PassAnalysisSupport.h" 41#include "llvm/Support/BranchProbability.h" 42#include "llvm/Support/CommandLine.h" 43#include "llvm/Support/Debug.h" 44#include "llvm/Support/ErrorHandling.h" 45#include "llvm/Support/Timer.h" 46#include "llvm/Support/raw_ostream.h" 47#include <queue> 48 49using namespace llvm; 50 51#define DEBUG_TYPE "regalloc" 52 53STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 54STATISTIC(NumLocalSplits, "Number of split local live ranges"); 55STATISTIC(NumEvicted, "Number of interferences evicted"); 56 57static cl::opt<SplitEditor::ComplementSpillMode> 58SplitSpillMode("split-spill-mode", cl::Hidden, 59 cl::desc("Spill mode for splitting live ranges"), 60 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 61 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 62 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), 63 clEnumValEnd), 64 cl::init(SplitEditor::SM_Partition)); 65 66static cl::opt<unsigned> 67LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 68 cl::desc("Last chance recoloring max depth"), 69 cl::init(5)); 70 71static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 72 "lcr-max-interf", cl::Hidden, 73 cl::desc("Last chance recoloring maximum number of considered" 74 " interference at a time"), 75 cl::init(8)); 76 77static cl::opt<bool> 78ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, 79 cl::desc("Exhaustive Search for registers bypassing the depth " 80 "and interference cutoffs of last chance recoloring")); 81 82// FIXME: Find a good default for this flag and remove the flag. 83static cl::opt<unsigned> 84CSRFirstTimeCost("regalloc-csr-first-time-cost", 85 cl::desc("Cost for first time use of callee-saved register."), 86 cl::init(0), cl::Hidden); 87 88static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 89 createGreedyRegisterAllocator); 90 91namespace { 92class RAGreedy : public MachineFunctionPass, 93 public RegAllocBase, 94 private LiveRangeEdit::Delegate { 95 // Convenient shortcuts. 96 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 97 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 98 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 99 100 // context 101 MachineFunction *MF; 102 103 // Shortcuts to some useful interface. 104 const TargetInstrInfo *TII; 105 const TargetRegisterInfo *TRI; 106 RegisterClassInfo RCI; 107 108 // analyses 109 SlotIndexes *Indexes; 110 MachineBlockFrequencyInfo *MBFI; 111 MachineDominatorTree *DomTree; 112 MachineLoopInfo *Loops; 113 EdgeBundles *Bundles; 114 SpillPlacement *SpillPlacer; 115 LiveDebugVariables *DebugVars; 116 117 // state 118 std::unique_ptr<Spiller> SpillerInstance; 119 PQueue Queue; 120 unsigned NextCascade; 121 122 // Live ranges pass through a number of stages as we try to allocate them. 123 // Some of the stages may also create new live ranges: 124 // 125 // - Region splitting. 126 // - Per-block splitting. 127 // - Local splitting. 128 // - Spilling. 129 // 130 // Ranges produced by one of the stages skip the previous stages when they are 131 // dequeued. This improves performance because we can skip interference checks 132 // that are unlikely to give any results. It also guarantees that the live 133 // range splitting algorithm terminates, something that is otherwise hard to 134 // ensure. 135 enum LiveRangeStage { 136 /// Newly created live range that has never been queued. 137 RS_New, 138 139 /// Only attempt assignment and eviction. Then requeue as RS_Split. 140 RS_Assign, 141 142 /// Attempt live range splitting if assignment is impossible. 143 RS_Split, 144 145 /// Attempt more aggressive live range splitting that is guaranteed to make 146 /// progress. This is used for split products that may not be making 147 /// progress. 148 RS_Split2, 149 150 /// Live range will be spilled. No more splitting will be attempted. 151 RS_Spill, 152 153 /// There is nothing more we can do to this live range. Abort compilation 154 /// if it can't be assigned. 155 RS_Done 156 }; 157 158 // Enum CutOffStage to keep a track whether the register allocation failed 159 // because of the cutoffs encountered in last chance recoloring. 160 // Note: This is used as bitmask. New value should be next power of 2. 161 enum CutOffStage { 162 // No cutoffs encountered 163 CO_None = 0, 164 165 // lcr-max-depth cutoff encountered 166 CO_Depth = 1, 167 168 // lcr-max-interf cutoff encountered 169 CO_Interf = 2 170 }; 171 172 uint8_t CutOffInfo; 173 174#ifndef NDEBUG 175 static const char *const StageName[]; 176#endif 177 178 // RegInfo - Keep additional information about each live range. 179 struct RegInfo { 180 LiveRangeStage Stage; 181 182 // Cascade - Eviction loop prevention. See canEvictInterference(). 183 unsigned Cascade; 184 185 RegInfo() : Stage(RS_New), Cascade(0) {} 186 }; 187 188 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 189 190 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 191 return ExtraRegInfo[VirtReg.reg].Stage; 192 } 193 194 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 195 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 196 ExtraRegInfo[VirtReg.reg].Stage = Stage; 197 } 198 199 template<typename Iterator> 200 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 201 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 202 for (;Begin != End; ++Begin) { 203 unsigned Reg = *Begin; 204 if (ExtraRegInfo[Reg].Stage == RS_New) 205 ExtraRegInfo[Reg].Stage = NewStage; 206 } 207 } 208 209 /// Cost of evicting interference. 210 struct EvictionCost { 211 unsigned BrokenHints; ///< Total number of broken hints. 212 float MaxWeight; ///< Maximum spill weight evicted. 213 214 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 215 216 bool isMax() const { return BrokenHints == ~0u; } 217 218 void setMax() { BrokenHints = ~0u; } 219 220 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 221 222 bool operator<(const EvictionCost &O) const { 223 return std::tie(BrokenHints, MaxWeight) < 224 std::tie(O.BrokenHints, O.MaxWeight); 225 } 226 }; 227 228 // splitting state. 229 std::unique_ptr<SplitAnalysis> SA; 230 std::unique_ptr<SplitEditor> SE; 231 232 /// Cached per-block interference maps 233 InterferenceCache IntfCache; 234 235 /// All basic blocks where the current register has uses. 236 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 237 238 /// Global live range splitting candidate info. 239 struct GlobalSplitCandidate { 240 // Register intended for assignment, or 0. 241 unsigned PhysReg; 242 243 // SplitKit interval index for this candidate. 244 unsigned IntvIdx; 245 246 // Interference for PhysReg. 247 InterferenceCache::Cursor Intf; 248 249 // Bundles where this candidate should be live. 250 BitVector LiveBundles; 251 SmallVector<unsigned, 8> ActiveBlocks; 252 253 void reset(InterferenceCache &Cache, unsigned Reg) { 254 PhysReg = Reg; 255 IntvIdx = 0; 256 Intf.setPhysReg(Cache, Reg); 257 LiveBundles.clear(); 258 ActiveBlocks.clear(); 259 } 260 261 // Set B[i] = C for every live bundle where B[i] was NoCand. 262 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 263 unsigned Count = 0; 264 for (int i = LiveBundles.find_first(); i >= 0; 265 i = LiveBundles.find_next(i)) 266 if (B[i] == NoCand) { 267 B[i] = C; 268 Count++; 269 } 270 return Count; 271 } 272 }; 273 274 /// Candidate info for each PhysReg in AllocationOrder. 275 /// This vector never shrinks, but grows to the size of the largest register 276 /// class. 277 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 278 279 enum : unsigned { NoCand = ~0u }; 280 281 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 282 /// NoCand which indicates the stack interval. 283 SmallVector<unsigned, 32> BundleCand; 284 285 /// Callee-save register cost, calculated once per machine function. 286 BlockFrequency CSRCost; 287 288public: 289 RAGreedy(); 290 291 /// Return the pass name. 292 const char* getPassName() const override { 293 return "Greedy Register Allocator"; 294 } 295 296 /// RAGreedy analysis usage. 297 void getAnalysisUsage(AnalysisUsage &AU) const override; 298 void releaseMemory() override; 299 Spiller &spiller() override { return *SpillerInstance; } 300 void enqueue(LiveInterval *LI) override; 301 LiveInterval *dequeue() override; 302 unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; 303 304 /// Perform register allocation. 305 bool runOnMachineFunction(MachineFunction &mf) override; 306 307 static char ID; 308 309private: 310 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 311 SmallVirtRegSet &, unsigned = 0); 312 313 bool LRE_CanEraseVirtReg(unsigned) override; 314 void LRE_WillShrinkVirtReg(unsigned) override; 315 void LRE_DidCloneVirtReg(unsigned, unsigned) override; 316 void enqueue(PQueue &CurQueue, LiveInterval *LI); 317 LiveInterval *dequeue(PQueue &CurQueue); 318 319 BlockFrequency calcSpillCost(); 320 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 321 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 322 void growRegion(GlobalSplitCandidate &Cand); 323 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 324 bool calcCompactRegion(GlobalSplitCandidate&); 325 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 326 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 327 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 328 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 329 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 330 void evictInterference(LiveInterval&, unsigned, 331 SmallVectorImpl<unsigned>&); 332 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 333 SmallLISet &RecoloringCandidates, 334 const SmallVirtRegSet &FixedRegisters); 335 336 unsigned tryAssign(LiveInterval&, AllocationOrder&, 337 SmallVectorImpl<unsigned>&); 338 unsigned tryEvict(LiveInterval&, AllocationOrder&, 339 SmallVectorImpl<unsigned>&, unsigned = ~0u); 340 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 341 SmallVectorImpl<unsigned>&); 342 /// Calculate cost of region splitting. 343 unsigned calculateRegionSplitCost(LiveInterval &VirtReg, 344 AllocationOrder &Order, 345 BlockFrequency &BestCost, 346 unsigned &NumCands, bool IgnoreCSR); 347 /// Perform region splitting. 348 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 349 bool HasCompact, 350 SmallVectorImpl<unsigned> &NewVRegs); 351 /// Check other options before using a callee-saved register for the first 352 /// time. 353 unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, 354 unsigned PhysReg, unsigned &CostPerUseLimit, 355 SmallVectorImpl<unsigned> &NewVRegs); 356 void initializeCSRCost(); 357 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 358 SmallVectorImpl<unsigned>&); 359 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 360 SmallVectorImpl<unsigned>&); 361 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 362 SmallVectorImpl<unsigned>&); 363 unsigned trySplit(LiveInterval&, AllocationOrder&, 364 SmallVectorImpl<unsigned>&); 365 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 366 SmallVectorImpl<unsigned> &, 367 SmallVirtRegSet &, unsigned); 368 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 369 SmallVirtRegSet &, unsigned); 370}; 371} // end anonymous namespace 372 373char RAGreedy::ID = 0; 374 375#ifndef NDEBUG 376const char *const RAGreedy::StageName[] = { 377 "RS_New", 378 "RS_Assign", 379 "RS_Split", 380 "RS_Split2", 381 "RS_Spill", 382 "RS_Done" 383}; 384#endif 385 386// Hysteresis to use when comparing floats. 387// This helps stabilize decisions based on float comparisons. 388const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 389 390 391FunctionPass* llvm::createGreedyRegisterAllocator() { 392 return new RAGreedy(); 393} 394 395RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 396 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 397 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 398 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 399 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 400 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 401 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 402 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 403 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 404 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 405 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 406 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 407 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 408 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 409} 410 411void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 412 AU.setPreservesCFG(); 413 AU.addRequired<MachineBlockFrequencyInfo>(); 414 AU.addPreserved<MachineBlockFrequencyInfo>(); 415 AU.addRequired<AliasAnalysis>(); 416 AU.addPreserved<AliasAnalysis>(); 417 AU.addRequired<LiveIntervals>(); 418 AU.addPreserved<LiveIntervals>(); 419 AU.addRequired<SlotIndexes>(); 420 AU.addPreserved<SlotIndexes>(); 421 AU.addRequired<LiveDebugVariables>(); 422 AU.addPreserved<LiveDebugVariables>(); 423 AU.addRequired<LiveStacks>(); 424 AU.addPreserved<LiveStacks>(); 425 AU.addRequired<MachineDominatorTree>(); 426 AU.addPreserved<MachineDominatorTree>(); 427 AU.addRequired<MachineLoopInfo>(); 428 AU.addPreserved<MachineLoopInfo>(); 429 AU.addRequired<VirtRegMap>(); 430 AU.addPreserved<VirtRegMap>(); 431 AU.addRequired<LiveRegMatrix>(); 432 AU.addPreserved<LiveRegMatrix>(); 433 AU.addRequired<EdgeBundles>(); 434 AU.addRequired<SpillPlacement>(); 435 MachineFunctionPass::getAnalysisUsage(AU); 436} 437 438 439//===----------------------------------------------------------------------===// 440// LiveRangeEdit delegate methods 441//===----------------------------------------------------------------------===// 442 443bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 444 if (VRM->hasPhys(VirtReg)) { 445 Matrix->unassign(LIS->getInterval(VirtReg)); 446 return true; 447 } 448 // Unassigned virtreg is probably in the priority queue. 449 // RegAllocBase will erase it after dequeueing. 450 return false; 451} 452 453void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 454 if (!VRM->hasPhys(VirtReg)) 455 return; 456 457 // Register is assigned, put it back on the queue for reassignment. 458 LiveInterval &LI = LIS->getInterval(VirtReg); 459 Matrix->unassign(LI); 460 enqueue(&LI); 461} 462 463void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 464 // Cloning a register we haven't even heard about yet? Just ignore it. 465 if (!ExtraRegInfo.inBounds(Old)) 466 return; 467 468 // LRE may clone a virtual register because dead code elimination causes it to 469 // be split into connected components. The new components are much smaller 470 // than the original, so they should get a new chance at being assigned. 471 // same stage as the parent. 472 ExtraRegInfo[Old].Stage = RS_Assign; 473 ExtraRegInfo.grow(New); 474 ExtraRegInfo[New] = ExtraRegInfo[Old]; 475} 476 477void RAGreedy::releaseMemory() { 478 SpillerInstance.reset(nullptr); 479 ExtraRegInfo.clear(); 480 GlobalCand.clear(); 481} 482 483void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 484 485void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 486 // Prioritize live ranges by size, assigning larger ranges first. 487 // The queue holds (size, reg) pairs. 488 const unsigned Size = LI->getSize(); 489 const unsigned Reg = LI->reg; 490 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 491 "Can only enqueue virtual registers"); 492 unsigned Prio; 493 494 ExtraRegInfo.grow(Reg); 495 if (ExtraRegInfo[Reg].Stage == RS_New) 496 ExtraRegInfo[Reg].Stage = RS_Assign; 497 498 if (ExtraRegInfo[Reg].Stage == RS_Split) { 499 // Unsplit ranges that couldn't be allocated immediately are deferred until 500 // everything else has been allocated. 501 Prio = Size; 502 } else { 503 // Giant live ranges fall back to the global assignment heuristic, which 504 // prevents excessive spilling in pathological cases. 505 bool ReverseLocal = TRI->reverseLocalAssignment(); 506 bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && 507 (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); 508 509 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && 510 LIS->intervalIsInOneMBB(*LI)) { 511 // Allocate original local ranges in linear instruction order. Since they 512 // are singly defined, this produces optimal coloring in the absence of 513 // global interference and other constraints. 514 if (!ReverseLocal) 515 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 516 else { 517 // Allocating bottom up may allow many short LRGs to be assigned first 518 // to one of the cheap registers. This could be much faster for very 519 // large blocks on targets with many physical registers. 520 Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); 521 } 522 } 523 else { 524 // Allocate global and split ranges in long->short order. Long ranges that 525 // don't fit should be spilled (or split) ASAP so they don't create 526 // interference. Mark a bit to prioritize global above local ranges. 527 Prio = (1u << 29) + Size; 528 } 529 // Mark a higher bit to prioritize global and local above RS_Split. 530 Prio |= (1u << 31); 531 532 // Boost ranges that have a physical register hint. 533 if (VRM->hasKnownPreference(Reg)) 534 Prio |= (1u << 30); 535 } 536 // The virtual register number is a tie breaker for same-sized ranges. 537 // Give lower vreg numbers higher priority to assign them first. 538 CurQueue.push(std::make_pair(Prio, ~Reg)); 539} 540 541LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 542 543LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 544 if (CurQueue.empty()) 545 return nullptr; 546 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 547 CurQueue.pop(); 548 return LI; 549} 550 551 552//===----------------------------------------------------------------------===// 553// Direct Assignment 554//===----------------------------------------------------------------------===// 555 556/// tryAssign - Try to assign VirtReg to an available register. 557unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 558 AllocationOrder &Order, 559 SmallVectorImpl<unsigned> &NewVRegs) { 560 Order.rewind(); 561 unsigned PhysReg; 562 while ((PhysReg = Order.next())) 563 if (!Matrix->checkInterference(VirtReg, PhysReg)) 564 break; 565 if (!PhysReg || Order.isHint()) 566 return PhysReg; 567 568 // PhysReg is available, but there may be a better choice. 569 570 // If we missed a simple hint, try to cheaply evict interference from the 571 // preferred register. 572 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 573 if (Order.isHint(Hint)) { 574 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 575 EvictionCost MaxCost; 576 MaxCost.setBrokenHints(1); 577 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 578 evictInterference(VirtReg, Hint, NewVRegs); 579 return Hint; 580 } 581 } 582 583 // Try to evict interference from a cheaper alternative. 584 unsigned Cost = TRI->getCostPerUse(PhysReg); 585 586 // Most registers have 0 additional cost. 587 if (!Cost) 588 return PhysReg; 589 590 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 591 << '\n'); 592 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 593 return CheapReg ? CheapReg : PhysReg; 594} 595 596 597//===----------------------------------------------------------------------===// 598// Interference eviction 599//===----------------------------------------------------------------------===// 600 601unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 602 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 603 unsigned PhysReg; 604 while ((PhysReg = Order.next())) { 605 if (PhysReg == PrevReg) 606 continue; 607 608 MCRegUnitIterator Units(PhysReg, TRI); 609 for (; Units.isValid(); ++Units) { 610 // Instantiate a "subquery", not to be confused with the Queries array. 611 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 612 if (subQ.checkInterference()) 613 break; 614 } 615 // If no units have interference, break out with the current PhysReg. 616 if (!Units.isValid()) 617 break; 618 } 619 if (PhysReg) 620 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 621 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 622 << '\n'); 623 return PhysReg; 624} 625 626/// shouldEvict - determine if A should evict the assigned live range B. The 627/// eviction policy defined by this function together with the allocation order 628/// defined by enqueue() decides which registers ultimately end up being split 629/// and spilled. 630/// 631/// Cascade numbers are used to prevent infinite loops if this function is a 632/// cyclic relation. 633/// 634/// @param A The live range to be assigned. 635/// @param IsHint True when A is about to be assigned to its preferred 636/// register. 637/// @param B The live range to be evicted. 638/// @param BreaksHint True when B is already assigned to its preferred register. 639bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 640 LiveInterval &B, bool BreaksHint) { 641 bool CanSplit = getStage(B) < RS_Spill; 642 643 // Be fairly aggressive about following hints as long as the evictee can be 644 // split. 645 if (CanSplit && IsHint && !BreaksHint) 646 return true; 647 648 if (A.weight > B.weight) { 649 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 650 return true; 651 } 652 return false; 653} 654 655/// canEvictInterference - Return true if all interferences between VirtReg and 656/// PhysReg can be evicted. 657/// 658/// @param VirtReg Live range that is about to be assigned. 659/// @param PhysReg Desired register for assignment. 660/// @param IsHint True when PhysReg is VirtReg's preferred register. 661/// @param MaxCost Only look for cheaper candidates and update with new cost 662/// when returning true. 663/// @returns True when interference can be evicted cheaper than MaxCost. 664bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 665 bool IsHint, EvictionCost &MaxCost) { 666 // It is only possible to evict virtual register interference. 667 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 668 return false; 669 670 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 671 672 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 673 // involved in an eviction before. If a cascade number was assigned, deny 674 // evicting anything with the same or a newer cascade number. This prevents 675 // infinite eviction loops. 676 // 677 // This works out so a register without a cascade number is allowed to evict 678 // anything, and it can be evicted by anything. 679 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 680 if (!Cascade) 681 Cascade = NextCascade; 682 683 EvictionCost Cost; 684 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 685 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 686 // If there is 10 or more interferences, chances are one is heavier. 687 if (Q.collectInterferingVRegs(10) >= 10) 688 return false; 689 690 // Check if any interfering live range is heavier than MaxWeight. 691 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 692 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 693 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 694 "Only expecting virtual register interference from query"); 695 // Never evict spill products. They cannot split or spill. 696 if (getStage(*Intf) == RS_Done) 697 return false; 698 // Once a live range becomes small enough, it is urgent that we find a 699 // register for it. This is indicated by an infinite spill weight. These 700 // urgent live ranges get to evict almost anything. 701 // 702 // Also allow urgent evictions of unspillable ranges from a strictly 703 // larger allocation order. 704 bool Urgent = !VirtReg.isSpillable() && 705 (Intf->isSpillable() || 706 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 707 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 708 // Only evict older cascades or live ranges without a cascade. 709 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 710 if (Cascade <= IntfCascade) { 711 if (!Urgent) 712 return false; 713 // We permit breaking cascades for urgent evictions. It should be the 714 // last resort, though, so make it really expensive. 715 Cost.BrokenHints += 10; 716 } 717 // Would this break a satisfied hint? 718 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 719 // Update eviction cost. 720 Cost.BrokenHints += BreaksHint; 721 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 722 // Abort if this would be too expensive. 723 if (!(Cost < MaxCost)) 724 return false; 725 if (Urgent) 726 continue; 727 // Apply the eviction policy for non-urgent evictions. 728 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 729 return false; 730 // If !MaxCost.isMax(), then we're just looking for a cheap register. 731 // Evicting another local live range in this case could lead to suboptimal 732 // coloring. 733 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 734 !canReassign(*Intf, PhysReg)) { 735 return false; 736 } 737 } 738 } 739 MaxCost = Cost; 740 return true; 741} 742 743/// evictInterference - Evict any interferring registers that prevent VirtReg 744/// from being assigned to Physreg. This assumes that canEvictInterference 745/// returned true. 746void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 747 SmallVectorImpl<unsigned> &NewVRegs) { 748 // Make sure that VirtReg has a cascade number, and assign that cascade 749 // number to every evicted register. These live ranges than then only be 750 // evicted by a newer cascade, preventing infinite loops. 751 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 752 if (!Cascade) 753 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 754 755 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 756 << " interference: Cascade " << Cascade << '\n'); 757 758 // Collect all interfering virtregs first. 759 SmallVector<LiveInterval*, 8> Intfs; 760 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 761 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 762 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 763 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 764 Intfs.append(IVR.begin(), IVR.end()); 765 } 766 767 // Evict them second. This will invalidate the queries. 768 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 769 LiveInterval *Intf = Intfs[i]; 770 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 771 if (!VRM->hasPhys(Intf->reg)) 772 continue; 773 Matrix->unassign(*Intf); 774 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 775 VirtReg.isSpillable() < Intf->isSpillable()) && 776 "Cannot decrease cascade number, illegal eviction"); 777 ExtraRegInfo[Intf->reg].Cascade = Cascade; 778 ++NumEvicted; 779 NewVRegs.push_back(Intf->reg); 780 } 781} 782 783/// tryEvict - Try to evict all interferences for a physreg. 784/// @param VirtReg Currently unassigned virtual register. 785/// @param Order Physregs to try. 786/// @return Physreg to assign VirtReg, or 0. 787unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 788 AllocationOrder &Order, 789 SmallVectorImpl<unsigned> &NewVRegs, 790 unsigned CostPerUseLimit) { 791 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 792 793 // Keep track of the cheapest interference seen so far. 794 EvictionCost BestCost; 795 BestCost.setMax(); 796 unsigned BestPhys = 0; 797 unsigned OrderLimit = Order.getOrder().size(); 798 799 // When we are just looking for a reduced cost per use, don't break any 800 // hints, and only evict smaller spill weights. 801 if (CostPerUseLimit < ~0u) { 802 BestCost.BrokenHints = 0; 803 BestCost.MaxWeight = VirtReg.weight; 804 805 // Check of any registers in RC are below CostPerUseLimit. 806 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 807 unsigned MinCost = RegClassInfo.getMinCost(RC); 808 if (MinCost >= CostPerUseLimit) { 809 DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost 810 << ", no cheaper registers to be found.\n"); 811 return 0; 812 } 813 814 // It is normal for register classes to have a long tail of registers with 815 // the same cost. We don't need to look at them if they're too expensive. 816 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 817 OrderLimit = RegClassInfo.getLastCostChange(RC); 818 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 819 } 820 } 821 822 Order.rewind(); 823 while (unsigned PhysReg = Order.next(OrderLimit)) { 824 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 825 continue; 826 // The first use of a callee-saved register in a function has cost 1. 827 // Don't start using a CSR when the CostPerUseLimit is low. 828 if (CostPerUseLimit == 1) 829 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 830 if (!MRI->isPhysRegUsed(CSR)) { 831 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 832 << PrintReg(CSR, TRI) << '\n'); 833 continue; 834 } 835 836 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 837 continue; 838 839 // Best so far. 840 BestPhys = PhysReg; 841 842 // Stop if the hint can be used. 843 if (Order.isHint()) 844 break; 845 } 846 847 if (!BestPhys) 848 return 0; 849 850 evictInterference(VirtReg, BestPhys, NewVRegs); 851 return BestPhys; 852} 853 854 855//===----------------------------------------------------------------------===// 856// Region Splitting 857//===----------------------------------------------------------------------===// 858 859/// addSplitConstraints - Fill out the SplitConstraints vector based on the 860/// interference pattern in Physreg and its aliases. Add the constraints to 861/// SpillPlacement and return the static cost of this split in Cost, assuming 862/// that all preferences in SplitConstraints are met. 863/// Return false if there are no bundles with positive bias. 864bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 865 BlockFrequency &Cost) { 866 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 867 868 // Reset interference dependent info. 869 SplitConstraints.resize(UseBlocks.size()); 870 BlockFrequency StaticCost = 0; 871 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 872 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 873 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 874 875 BC.Number = BI.MBB->getNumber(); 876 Intf.moveToBlock(BC.Number); 877 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 878 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 879 BC.ChangesValue = BI.FirstDef.isValid(); 880 881 if (!Intf.hasInterference()) 882 continue; 883 884 // Number of spill code instructions to insert. 885 unsigned Ins = 0; 886 887 // Interference for the live-in value. 888 if (BI.LiveIn) { 889 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) 890 BC.Entry = SpillPlacement::MustSpill, ++Ins; 891 else if (Intf.first() < BI.FirstInstr) 892 BC.Entry = SpillPlacement::PrefSpill, ++Ins; 893 else if (Intf.first() < BI.LastInstr) 894 ++Ins; 895 } 896 897 // Interference for the live-out value. 898 if (BI.LiveOut) { 899 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) 900 BC.Exit = SpillPlacement::MustSpill, ++Ins; 901 else if (Intf.last() > BI.LastInstr) 902 BC.Exit = SpillPlacement::PrefSpill, ++Ins; 903 else if (Intf.last() > BI.FirstInstr) 904 ++Ins; 905 } 906 907 // Accumulate the total frequency of inserted spill code. 908 while (Ins--) 909 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 910 } 911 Cost = StaticCost; 912 913 // Add constraints for use-blocks. Note that these are the only constraints 914 // that may add a positive bias, it is downhill from here. 915 SpillPlacer->addConstraints(SplitConstraints); 916 return SpillPlacer->scanActiveBundles(); 917} 918 919 920/// addThroughConstraints - Add constraints and links to SpillPlacer from the 921/// live-through blocks in Blocks. 922void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 923 ArrayRef<unsigned> Blocks) { 924 const unsigned GroupSize = 8; 925 SpillPlacement::BlockConstraint BCS[GroupSize]; 926 unsigned TBS[GroupSize]; 927 unsigned B = 0, T = 0; 928 929 for (unsigned i = 0; i != Blocks.size(); ++i) { 930 unsigned Number = Blocks[i]; 931 Intf.moveToBlock(Number); 932 933 if (!Intf.hasInterference()) { 934 assert(T < GroupSize && "Array overflow"); 935 TBS[T] = Number; 936 if (++T == GroupSize) { 937 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 938 T = 0; 939 } 940 continue; 941 } 942 943 assert(B < GroupSize && "Array overflow"); 944 BCS[B].Number = Number; 945 946 // Interference for the live-in value. 947 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 948 BCS[B].Entry = SpillPlacement::MustSpill; 949 else 950 BCS[B].Entry = SpillPlacement::PrefSpill; 951 952 // Interference for the live-out value. 953 if (Intf.last() >= SA->getLastSplitPoint(Number)) 954 BCS[B].Exit = SpillPlacement::MustSpill; 955 else 956 BCS[B].Exit = SpillPlacement::PrefSpill; 957 958 if (++B == GroupSize) { 959 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 960 SpillPlacer->addConstraints(Array); 961 B = 0; 962 } 963 } 964 965 ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); 966 SpillPlacer->addConstraints(Array); 967 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 968} 969 970void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 971 // Keep track of through blocks that have not been added to SpillPlacer. 972 BitVector Todo = SA->getThroughBlocks(); 973 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 974 unsigned AddedTo = 0; 975#ifndef NDEBUG 976 unsigned Visited = 0; 977#endif 978 979 for (;;) { 980 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 981 // Find new through blocks in the periphery of PrefRegBundles. 982 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 983 unsigned Bundle = NewBundles[i]; 984 // Look at all blocks connected to Bundle in the full graph. 985 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 986 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 987 I != E; ++I) { 988 unsigned Block = *I; 989 if (!Todo.test(Block)) 990 continue; 991 Todo.reset(Block); 992 // This is a new through block. Add it to SpillPlacer later. 993 ActiveBlocks.push_back(Block); 994#ifndef NDEBUG 995 ++Visited; 996#endif 997 } 998 } 999 // Any new blocks to add? 1000 if (ActiveBlocks.size() == AddedTo) 1001 break; 1002 1003 // Compute through constraints from the interference, or assume that all 1004 // through blocks prefer spilling when forming compact regions. 1005 ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 1006 if (Cand.PhysReg) 1007 addThroughConstraints(Cand.Intf, NewBlocks); 1008 else 1009 // Provide a strong negative bias on through blocks to prevent unwanted 1010 // liveness on loop backedges. 1011 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 1012 AddedTo = ActiveBlocks.size(); 1013 1014 // Perhaps iterating can enable more bundles? 1015 SpillPlacer->iterate(); 1016 } 1017 DEBUG(dbgs() << ", v=" << Visited); 1018} 1019 1020/// calcCompactRegion - Compute the set of edge bundles that should be live 1021/// when splitting the current live range into compact regions. Compact 1022/// regions can be computed without looking at interference. They are the 1023/// regions formed by removing all the live-through blocks from the live range. 1024/// 1025/// Returns false if the current live range is already compact, or if the 1026/// compact regions would form single block regions anyway. 1027bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 1028 // Without any through blocks, the live range is already compact. 1029 if (!SA->getNumThroughBlocks()) 1030 return false; 1031 1032 // Compact regions don't correspond to any physreg. 1033 Cand.reset(IntfCache, 0); 1034 1035 DEBUG(dbgs() << "Compact region bundles"); 1036 1037 // Use the spill placer to determine the live bundles. GrowRegion pretends 1038 // that all the through blocks have interference when PhysReg is unset. 1039 SpillPlacer->prepare(Cand.LiveBundles); 1040 1041 // The static split cost will be zero since Cand.Intf reports no interference. 1042 BlockFrequency Cost; 1043 if (!addSplitConstraints(Cand.Intf, Cost)) { 1044 DEBUG(dbgs() << ", none.\n"); 1045 return false; 1046 } 1047 1048 growRegion(Cand); 1049 SpillPlacer->finish(); 1050 1051 if (!Cand.LiveBundles.any()) { 1052 DEBUG(dbgs() << ", none.\n"); 1053 return false; 1054 } 1055 1056 DEBUG({ 1057 for (int i = Cand.LiveBundles.find_first(); i>=0; 1058 i = Cand.LiveBundles.find_next(i)) 1059 dbgs() << " EB#" << i; 1060 dbgs() << ".\n"; 1061 }); 1062 return true; 1063} 1064 1065/// calcSpillCost - Compute how expensive it would be to split the live range in 1066/// SA around all use blocks instead of forming bundle regions. 1067BlockFrequency RAGreedy::calcSpillCost() { 1068 BlockFrequency Cost = 0; 1069 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1070 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1071 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1072 unsigned Number = BI.MBB->getNumber(); 1073 // We normally only need one spill instruction - a load or a store. 1074 Cost += SpillPlacer->getBlockFrequency(Number); 1075 1076 // Unless the value is redefined in the block. 1077 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1078 Cost += SpillPlacer->getBlockFrequency(Number); 1079 } 1080 return Cost; 1081} 1082 1083/// calcGlobalSplitCost - Return the global split cost of following the split 1084/// pattern in LiveBundles. This cost should be added to the local cost of the 1085/// interference pattern in SplitConstraints. 1086/// 1087BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1088 BlockFrequency GlobalCost = 0; 1089 const BitVector &LiveBundles = Cand.LiveBundles; 1090 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1091 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1092 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1093 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1094 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1095 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1096 unsigned Ins = 0; 1097 1098 if (BI.LiveIn) 1099 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1100 if (BI.LiveOut) 1101 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1102 while (Ins--) 1103 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1104 } 1105 1106 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1107 unsigned Number = Cand.ActiveBlocks[i]; 1108 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1109 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1110 if (!RegIn && !RegOut) 1111 continue; 1112 if (RegIn && RegOut) { 1113 // We need double spill code if this block has interference. 1114 Cand.Intf.moveToBlock(Number); 1115 if (Cand.Intf.hasInterference()) { 1116 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1117 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1118 } 1119 continue; 1120 } 1121 // live-in / stack-out or stack-in live-out. 1122 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1123 } 1124 return GlobalCost; 1125} 1126 1127/// splitAroundRegion - Split the current live range around the regions 1128/// determined by BundleCand and GlobalCand. 1129/// 1130/// Before calling this function, GlobalCand and BundleCand must be initialized 1131/// so each bundle is assigned to a valid candidate, or NoCand for the 1132/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1133/// objects must be initialized for the current live range, and intervals 1134/// created for the used candidates. 1135/// 1136/// @param LREdit The LiveRangeEdit object handling the current split. 1137/// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1138/// must appear in this list. 1139void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1140 ArrayRef<unsigned> UsedCands) { 1141 // These are the intervals created for new global ranges. We may create more 1142 // intervals for local ranges. 1143 const unsigned NumGlobalIntvs = LREdit.size(); 1144 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1145 assert(NumGlobalIntvs && "No global intervals configured"); 1146 1147 // Isolate even single instructions when dealing with a proper sub-class. 1148 // That guarantees register class inflation for the stack interval because it 1149 // is all copies. 1150 unsigned Reg = SA->getParent().reg; 1151 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1152 1153 // First handle all the blocks with uses. 1154 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1155 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1156 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1157 unsigned Number = BI.MBB->getNumber(); 1158 unsigned IntvIn = 0, IntvOut = 0; 1159 SlotIndex IntfIn, IntfOut; 1160 if (BI.LiveIn) { 1161 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1162 if (CandIn != NoCand) { 1163 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1164 IntvIn = Cand.IntvIdx; 1165 Cand.Intf.moveToBlock(Number); 1166 IntfIn = Cand.Intf.first(); 1167 } 1168 } 1169 if (BI.LiveOut) { 1170 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1171 if (CandOut != NoCand) { 1172 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1173 IntvOut = Cand.IntvIdx; 1174 Cand.Intf.moveToBlock(Number); 1175 IntfOut = Cand.Intf.last(); 1176 } 1177 } 1178 1179 // Create separate intervals for isolated blocks with multiple uses. 1180 if (!IntvIn && !IntvOut) { 1181 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1182 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1183 SE->splitSingleBlock(BI); 1184 continue; 1185 } 1186 1187 if (IntvIn && IntvOut) 1188 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1189 else if (IntvIn) 1190 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1191 else 1192 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1193 } 1194 1195 // Handle live-through blocks. The relevant live-through blocks are stored in 1196 // the ActiveBlocks list with each candidate. We need to filter out 1197 // duplicates. 1198 BitVector Todo = SA->getThroughBlocks(); 1199 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1200 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1201 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1202 unsigned Number = Blocks[i]; 1203 if (!Todo.test(Number)) 1204 continue; 1205 Todo.reset(Number); 1206 1207 unsigned IntvIn = 0, IntvOut = 0; 1208 SlotIndex IntfIn, IntfOut; 1209 1210 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1211 if (CandIn != NoCand) { 1212 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1213 IntvIn = Cand.IntvIdx; 1214 Cand.Intf.moveToBlock(Number); 1215 IntfIn = Cand.Intf.first(); 1216 } 1217 1218 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1219 if (CandOut != NoCand) { 1220 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1221 IntvOut = Cand.IntvIdx; 1222 Cand.Intf.moveToBlock(Number); 1223 IntfOut = Cand.Intf.last(); 1224 } 1225 if (!IntvIn && !IntvOut) 1226 continue; 1227 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1228 } 1229 } 1230 1231 ++NumGlobalSplits; 1232 1233 SmallVector<unsigned, 8> IntvMap; 1234 SE->finish(&IntvMap); 1235 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1236 1237 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1238 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1239 1240 // Sort out the new intervals created by splitting. We get four kinds: 1241 // - Remainder intervals should not be split again. 1242 // - Candidate intervals can be assigned to Cand.PhysReg. 1243 // - Block-local splits are candidates for local splitting. 1244 // - DCE leftovers should go back on the queue. 1245 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1246 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1247 1248 // Ignore old intervals from DCE. 1249 if (getStage(Reg) != RS_New) 1250 continue; 1251 1252 // Remainder interval. Don't try splitting again, spill if it doesn't 1253 // allocate. 1254 if (IntvMap[i] == 0) { 1255 setStage(Reg, RS_Spill); 1256 continue; 1257 } 1258 1259 // Global intervals. Allow repeated splitting as long as the number of live 1260 // blocks is strictly decreasing. 1261 if (IntvMap[i] < NumGlobalIntvs) { 1262 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1263 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1264 << " blocks as original.\n"); 1265 // Don't allow repeated splitting as a safe guard against looping. 1266 setStage(Reg, RS_Split2); 1267 } 1268 continue; 1269 } 1270 1271 // Other intervals are treated as new. This includes local intervals created 1272 // for blocks with multiple uses, and anything created by DCE. 1273 } 1274 1275 if (VerifyEnabled) 1276 MF->verify(this, "After splitting live range around region"); 1277} 1278 1279unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1280 SmallVectorImpl<unsigned> &NewVRegs) { 1281 unsigned NumCands = 0; 1282 BlockFrequency BestCost; 1283 1284 // Check if we can split this live range around a compact region. 1285 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1286 if (HasCompact) { 1287 // Yes, keep GlobalCand[0] as the compact region candidate. 1288 NumCands = 1; 1289 BestCost = BlockFrequency::getMaxFrequency(); 1290 } else { 1291 // No benefit from the compact region, our fallback will be per-block 1292 // splitting. Make sure we find a solution that is cheaper than spilling. 1293 BestCost = calcSpillCost(); 1294 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1295 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1296 } 1297 1298 unsigned BestCand = 1299 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, 1300 false/*IgnoreCSR*/); 1301 1302 // No solutions found, fall back to single block splitting. 1303 if (!HasCompact && BestCand == NoCand) 1304 return 0; 1305 1306 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1307} 1308 1309unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, 1310 AllocationOrder &Order, 1311 BlockFrequency &BestCost, 1312 unsigned &NumCands, 1313 bool IgnoreCSR) { 1314 unsigned BestCand = NoCand; 1315 Order.rewind(); 1316 while (unsigned PhysReg = Order.next()) { 1317 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 1318 if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) 1319 continue; 1320 1321 // Discard bad candidates before we run out of interference cache cursors. 1322 // This will only affect register classes with a lot of registers (>32). 1323 if (NumCands == IntfCache.getMaxCursors()) { 1324 unsigned WorstCount = ~0u; 1325 unsigned Worst = 0; 1326 for (unsigned i = 0; i != NumCands; ++i) { 1327 if (i == BestCand || !GlobalCand[i].PhysReg) 1328 continue; 1329 unsigned Count = GlobalCand[i].LiveBundles.count(); 1330 if (Count < WorstCount) 1331 Worst = i, WorstCount = Count; 1332 } 1333 --NumCands; 1334 GlobalCand[Worst] = GlobalCand[NumCands]; 1335 if (BestCand == NumCands) 1336 BestCand = Worst; 1337 } 1338 1339 if (GlobalCand.size() <= NumCands) 1340 GlobalCand.resize(NumCands+1); 1341 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1342 Cand.reset(IntfCache, PhysReg); 1343 1344 SpillPlacer->prepare(Cand.LiveBundles); 1345 BlockFrequency Cost; 1346 if (!addSplitConstraints(Cand.Intf, Cost)) { 1347 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1348 continue; 1349 } 1350 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1351 MBFI->printBlockFreq(dbgs(), Cost)); 1352 if (Cost >= BestCost) { 1353 DEBUG({ 1354 if (BestCand == NoCand) 1355 dbgs() << " worse than no bundles\n"; 1356 else 1357 dbgs() << " worse than " 1358 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1359 }); 1360 continue; 1361 } 1362 growRegion(Cand); 1363 1364 SpillPlacer->finish(); 1365 1366 // No live bundles, defer to splitSingleBlocks(). 1367 if (!Cand.LiveBundles.any()) { 1368 DEBUG(dbgs() << " no bundles.\n"); 1369 continue; 1370 } 1371 1372 Cost += calcGlobalSplitCost(Cand); 1373 DEBUG({ 1374 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1375 << " with bundles"; 1376 for (int i = Cand.LiveBundles.find_first(); i>=0; 1377 i = Cand.LiveBundles.find_next(i)) 1378 dbgs() << " EB#" << i; 1379 dbgs() << ".\n"; 1380 }); 1381 if (Cost < BestCost) { 1382 BestCand = NumCands; 1383 BestCost = Cost; 1384 } 1385 ++NumCands; 1386 } 1387 return BestCand; 1388} 1389 1390unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 1391 bool HasCompact, 1392 SmallVectorImpl<unsigned> &NewVRegs) { 1393 SmallVector<unsigned, 8> UsedCands; 1394 // Prepare split editor. 1395 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1396 SE->reset(LREdit, SplitSpillMode); 1397 1398 // Assign all edge bundles to the preferred candidate, or NoCand. 1399 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1400 1401 // Assign bundles for the best candidate region. 1402 if (BestCand != NoCand) { 1403 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1404 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1405 UsedCands.push_back(BestCand); 1406 Cand.IntvIdx = SE->openIntv(); 1407 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1408 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1409 (void)B; 1410 } 1411 } 1412 1413 // Assign bundles for the compact region. 1414 if (HasCompact) { 1415 GlobalSplitCandidate &Cand = GlobalCand.front(); 1416 assert(!Cand.PhysReg && "Compact region has no physreg"); 1417 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1418 UsedCands.push_back(0); 1419 Cand.IntvIdx = SE->openIntv(); 1420 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1421 << Cand.IntvIdx << ".\n"); 1422 (void)B; 1423 } 1424 } 1425 1426 splitAroundRegion(LREdit, UsedCands); 1427 return 0; 1428} 1429 1430 1431//===----------------------------------------------------------------------===// 1432// Per-Block Splitting 1433//===----------------------------------------------------------------------===// 1434 1435/// tryBlockSplit - Split a global live range around every block with uses. This 1436/// creates a lot of local live ranges, that will be split by tryLocalSplit if 1437/// they don't allocate. 1438unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1439 SmallVectorImpl<unsigned> &NewVRegs) { 1440 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1441 unsigned Reg = VirtReg.reg; 1442 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1443 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1444 SE->reset(LREdit, SplitSpillMode); 1445 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1446 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1447 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1448 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1449 SE->splitSingleBlock(BI); 1450 } 1451 // No blocks were split. 1452 if (LREdit.empty()) 1453 return 0; 1454 1455 // We did split for some blocks. 1456 SmallVector<unsigned, 8> IntvMap; 1457 SE->finish(&IntvMap); 1458 1459 // Tell LiveDebugVariables about the new ranges. 1460 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1461 1462 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1463 1464 // Sort out the new intervals created by splitting. The remainder interval 1465 // goes straight to spilling, the new local ranges get to stay RS_New. 1466 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1467 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1468 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1469 setStage(LI, RS_Spill); 1470 } 1471 1472 if (VerifyEnabled) 1473 MF->verify(this, "After splitting live range around basic blocks"); 1474 return 0; 1475} 1476 1477 1478//===----------------------------------------------------------------------===// 1479// Per-Instruction Splitting 1480//===----------------------------------------------------------------------===// 1481 1482/// Get the number of allocatable registers that match the constraints of \p Reg 1483/// on \p MI and that are also in \p SuperRC. 1484static unsigned getNumAllocatableRegsForConstraints( 1485 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1486 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1487 const RegisterClassInfo &RCI) { 1488 assert(SuperRC && "Invalid register class"); 1489 1490 const TargetRegisterClass *ConstrainedRC = 1491 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1492 /* ExploreBundle */ true); 1493 if (!ConstrainedRC) 1494 return 0; 1495 return RCI.getNumAllocatableRegs(ConstrainedRC); 1496} 1497 1498/// tryInstructionSplit - Split a live range around individual instructions. 1499/// This is normally not worthwhile since the spiller is doing essentially the 1500/// same thing. However, when the live range is in a constrained register 1501/// class, it may help to insert copies such that parts of the live range can 1502/// be moved to a larger register class. 1503/// 1504/// This is similar to spilling to a larger register class. 1505unsigned 1506RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1507 SmallVectorImpl<unsigned> &NewVRegs) { 1508 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1509 // There is no point to this if there are no larger sub-classes. 1510 if (!RegClassInfo.isProperSubClass(CurRC)) 1511 return 0; 1512 1513 // Always enable split spill mode, since we're effectively spilling to a 1514 // register. 1515 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1516 SE->reset(LREdit, SplitEditor::SM_Size); 1517 1518 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1519 if (Uses.size() <= 1) 1520 return 0; 1521 1522 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1523 1524 const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); 1525 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1526 // Split around every non-copy instruction if this split will relax 1527 // the constraints on the virtual register. 1528 // Otherwise, splitting just inserts uncoalescable copies that do not help 1529 // the allocation. 1530 for (unsigned i = 0; i != Uses.size(); ++i) { 1531 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1532 if (MI->isFullCopy() || 1533 SuperRCNumAllocatableRegs == 1534 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1535 TRI, RCI)) { 1536 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1537 continue; 1538 } 1539 SE->openIntv(); 1540 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1541 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1542 SE->useIntv(SegStart, SegStop); 1543 } 1544 1545 if (LREdit.empty()) { 1546 DEBUG(dbgs() << "All uses were copies.\n"); 1547 return 0; 1548 } 1549 1550 SmallVector<unsigned, 8> IntvMap; 1551 SE->finish(&IntvMap); 1552 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1553 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1554 1555 // Assign all new registers to RS_Spill. This was the last chance. 1556 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1557 return 0; 1558} 1559 1560 1561//===----------------------------------------------------------------------===// 1562// Local Splitting 1563//===----------------------------------------------------------------------===// 1564 1565 1566/// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1567/// in order to use PhysReg between two entries in SA->UseSlots. 1568/// 1569/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1570/// 1571void RAGreedy::calcGapWeights(unsigned PhysReg, 1572 SmallVectorImpl<float> &GapWeight) { 1573 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1574 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1575 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1576 const unsigned NumGaps = Uses.size()-1; 1577 1578 // Start and end points for the interference check. 1579 SlotIndex StartIdx = 1580 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1581 SlotIndex StopIdx = 1582 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1583 1584 GapWeight.assign(NumGaps, 0.0f); 1585 1586 // Add interference from each overlapping register. 1587 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1588 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1589 .checkInterference()) 1590 continue; 1591 1592 // We know that VirtReg is a continuous interval from FirstInstr to 1593 // LastInstr, so we don't need InterferenceQuery. 1594 // 1595 // Interference that overlaps an instruction is counted in both gaps 1596 // surrounding the instruction. The exception is interference before 1597 // StartIdx and after StopIdx. 1598 // 1599 LiveIntervalUnion::SegmentIter IntI = 1600 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1601 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1602 // Skip the gaps before IntI. 1603 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1604 if (++Gap == NumGaps) 1605 break; 1606 if (Gap == NumGaps) 1607 break; 1608 1609 // Update the gaps covered by IntI. 1610 const float weight = IntI.value()->weight; 1611 for (; Gap != NumGaps; ++Gap) { 1612 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1613 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1614 break; 1615 } 1616 if (Gap == NumGaps) 1617 break; 1618 } 1619 } 1620 1621 // Add fixed interference. 1622 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1623 const LiveRange &LR = LIS->getRegUnit(*Units); 1624 LiveRange::const_iterator I = LR.find(StartIdx); 1625 LiveRange::const_iterator E = LR.end(); 1626 1627 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1628 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1629 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1630 if (++Gap == NumGaps) 1631 break; 1632 if (Gap == NumGaps) 1633 break; 1634 1635 for (; Gap != NumGaps; ++Gap) { 1636 GapWeight[Gap] = llvm::huge_valf; 1637 if (Uses[Gap+1].getBaseIndex() >= I->end) 1638 break; 1639 } 1640 if (Gap == NumGaps) 1641 break; 1642 } 1643 } 1644} 1645 1646/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1647/// basic block. 1648/// 1649unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1650 SmallVectorImpl<unsigned> &NewVRegs) { 1651 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1652 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1653 1654 // Note that it is possible to have an interval that is live-in or live-out 1655 // while only covering a single block - A phi-def can use undef values from 1656 // predecessors, and the block could be a single-block loop. 1657 // We don't bother doing anything clever about such a case, we simply assume 1658 // that the interval is continuous from FirstInstr to LastInstr. We should 1659 // make sure that we don't do anything illegal to such an interval, though. 1660 1661 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1662 if (Uses.size() <= 2) 1663 return 0; 1664 const unsigned NumGaps = Uses.size()-1; 1665 1666 DEBUG({ 1667 dbgs() << "tryLocalSplit: "; 1668 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1669 dbgs() << ' ' << Uses[i]; 1670 dbgs() << '\n'; 1671 }); 1672 1673 // If VirtReg is live across any register mask operands, compute a list of 1674 // gaps with register masks. 1675 SmallVector<unsigned, 8> RegMaskGaps; 1676 if (Matrix->checkRegMaskInterference(VirtReg)) { 1677 // Get regmask slots for the whole block. 1678 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1679 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1680 // Constrain to VirtReg's live range. 1681 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1682 Uses.front().getRegSlot()) - RMS.begin(); 1683 unsigned re = RMS.size(); 1684 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1685 // Look for Uses[i] <= RMS <= Uses[i+1]. 1686 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1687 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1688 continue; 1689 // Skip a regmask on the same instruction as the last use. It doesn't 1690 // overlap the live range. 1691 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1692 break; 1693 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1694 RegMaskGaps.push_back(i); 1695 // Advance ri to the next gap. A regmask on one of the uses counts in 1696 // both gaps. 1697 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1698 ++ri; 1699 } 1700 DEBUG(dbgs() << '\n'); 1701 } 1702 1703 // Since we allow local split results to be split again, there is a risk of 1704 // creating infinite loops. It is tempting to require that the new live 1705 // ranges have less instructions than the original. That would guarantee 1706 // convergence, but it is too strict. A live range with 3 instructions can be 1707 // split 2+3 (including the COPY), and we want to allow that. 1708 // 1709 // Instead we use these rules: 1710 // 1711 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1712 // noop split, of course). 1713 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1714 // the new ranges must have fewer instructions than before the split. 1715 // 3. New ranges with the same number of instructions are marked RS_Split2, 1716 // smaller ranges are marked RS_New. 1717 // 1718 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1719 // excessive splitting and infinite loops. 1720 // 1721 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1722 1723 // Best split candidate. 1724 unsigned BestBefore = NumGaps; 1725 unsigned BestAfter = 0; 1726 float BestDiff = 0; 1727 1728 const float blockFreq = 1729 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1730 (1.0f / MBFI->getEntryFreq()); 1731 SmallVector<float, 8> GapWeight; 1732 1733 Order.rewind(); 1734 while (unsigned PhysReg = Order.next()) { 1735 // Keep track of the largest spill weight that would need to be evicted in 1736 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1737 calcGapWeights(PhysReg, GapWeight); 1738 1739 // Remove any gaps with regmask clobbers. 1740 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1741 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1742 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1743 1744 // Try to find the best sequence of gaps to close. 1745 // The new spill weight must be larger than any gap interference. 1746 1747 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1748 unsigned SplitBefore = 0, SplitAfter = 1; 1749 1750 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1751 // It is the spill weight that needs to be evicted. 1752 float MaxGap = GapWeight[0]; 1753 1754 for (;;) { 1755 // Live before/after split? 1756 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1757 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1758 1759 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1760 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1761 << " i=" << MaxGap); 1762 1763 // Stop before the interval gets so big we wouldn't be making progress. 1764 if (!LiveBefore && !LiveAfter) { 1765 DEBUG(dbgs() << " all\n"); 1766 break; 1767 } 1768 // Should the interval be extended or shrunk? 1769 bool Shrink = true; 1770 1771 // How many gaps would the new range have? 1772 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1773 1774 // Legally, without causing looping? 1775 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1776 1777 if (Legal && MaxGap < llvm::huge_valf) { 1778 // Estimate the new spill weight. Each instruction reads or writes the 1779 // register. Conservatively assume there are no read-modify-write 1780 // instructions. 1781 // 1782 // Try to guess the size of the new interval. 1783 const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), 1784 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1785 (LiveBefore + LiveAfter)*SlotIndex::InstrDist); 1786 // Would this split be possible to allocate? 1787 // Never allocate all gaps, we wouldn't be making progress. 1788 DEBUG(dbgs() << " w=" << EstWeight); 1789 if (EstWeight * Hysteresis >= MaxGap) { 1790 Shrink = false; 1791 float Diff = EstWeight - MaxGap; 1792 if (Diff > BestDiff) { 1793 DEBUG(dbgs() << " (best)"); 1794 BestDiff = Hysteresis * Diff; 1795 BestBefore = SplitBefore; 1796 BestAfter = SplitAfter; 1797 } 1798 } 1799 } 1800 1801 // Try to shrink. 1802 if (Shrink) { 1803 if (++SplitBefore < SplitAfter) { 1804 DEBUG(dbgs() << " shrink\n"); 1805 // Recompute the max when necessary. 1806 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1807 MaxGap = GapWeight[SplitBefore]; 1808 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1809 MaxGap = std::max(MaxGap, GapWeight[i]); 1810 } 1811 continue; 1812 } 1813 MaxGap = 0; 1814 } 1815 1816 // Try to extend the interval. 1817 if (SplitAfter >= NumGaps) { 1818 DEBUG(dbgs() << " end\n"); 1819 break; 1820 } 1821 1822 DEBUG(dbgs() << " extend\n"); 1823 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1824 } 1825 } 1826 1827 // Didn't find any candidates? 1828 if (BestBefore == NumGaps) 1829 return 0; 1830 1831 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1832 << '-' << Uses[BestAfter] << ", " << BestDiff 1833 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1834 1835 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 1836 SE->reset(LREdit); 1837 1838 SE->openIntv(); 1839 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1840 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1841 SE->useIntv(SegStart, SegStop); 1842 SmallVector<unsigned, 8> IntvMap; 1843 SE->finish(&IntvMap); 1844 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1845 1846 // If the new range has the same number of instructions as before, mark it as 1847 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1848 // leave the new intervals as RS_New so they can compete. 1849 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1850 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1851 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1852 if (NewGaps >= NumGaps) { 1853 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1854 assert(!ProgressRequired && "Didn't make progress when it was required."); 1855 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1856 if (IntvMap[i] == 1) { 1857 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1858 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1859 } 1860 DEBUG(dbgs() << '\n'); 1861 } 1862 ++NumLocalSplits; 1863 1864 return 0; 1865} 1866 1867//===----------------------------------------------------------------------===// 1868// Live Range Splitting 1869//===----------------------------------------------------------------------===// 1870 1871/// trySplit - Try to split VirtReg or one of its interferences, making it 1872/// assignable. 1873/// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1874unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1875 SmallVectorImpl<unsigned>&NewVRegs) { 1876 // Ranges must be Split2 or less. 1877 if (getStage(VirtReg) >= RS_Spill) 1878 return 0; 1879 1880 // Local intervals are handled separately. 1881 if (LIS->intervalIsInOneMBB(VirtReg)) { 1882 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1883 SA->analyze(&VirtReg); 1884 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1885 if (PhysReg || !NewVRegs.empty()) 1886 return PhysReg; 1887 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1888 } 1889 1890 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1891 1892 SA->analyze(&VirtReg); 1893 1894 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1895 // coalescer. That may cause the range to become allocatable which means that 1896 // tryRegionSplit won't be making progress. This check should be replaced with 1897 // an assertion when the coalescer is fixed. 1898 if (SA->didRepairRange()) { 1899 // VirtReg has changed, so all cached queries are invalid. 1900 Matrix->invalidateVirtRegs(); 1901 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1902 return PhysReg; 1903 } 1904 1905 // First try to split around a region spanning multiple blocks. RS_Split2 1906 // ranges already made dubious progress with region splitting, so they go 1907 // straight to single block splitting. 1908 if (getStage(VirtReg) < RS_Split2) { 1909 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1910 if (PhysReg || !NewVRegs.empty()) 1911 return PhysReg; 1912 } 1913 1914 // Then isolate blocks. 1915 return tryBlockSplit(VirtReg, Order, NewVRegs); 1916} 1917 1918//===----------------------------------------------------------------------===// 1919// Last Chance Recoloring 1920//===----------------------------------------------------------------------===// 1921 1922/// mayRecolorAllInterferences - Check if the virtual registers that 1923/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 1924/// recolored to free \p PhysReg. 1925/// When true is returned, \p RecoloringCandidates has been augmented with all 1926/// the live intervals that need to be recolored in order to free \p PhysReg 1927/// for \p VirtReg. 1928/// \p FixedRegisters contains all the virtual registers that cannot be 1929/// recolored. 1930bool 1931RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 1932 SmallLISet &RecoloringCandidates, 1933 const SmallVirtRegSet &FixedRegisters) { 1934 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1935 1936 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1937 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 1938 // If there is LastChanceRecoloringMaxInterference or more interferences, 1939 // chances are one would not be recolorable. 1940 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 1941 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { 1942 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 1943 CutOffInfo |= CO_Interf; 1944 return false; 1945 } 1946 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 1947 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 1948 // If Intf is done and sit on the same register class as VirtReg, 1949 // it would not be recolorable as it is in the same state as VirtReg. 1950 if ((getStage(*Intf) == RS_Done && 1951 MRI->getRegClass(Intf->reg) == CurRC) || 1952 FixedRegisters.count(Intf->reg)) { 1953 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 1954 return false; 1955 } 1956 RecoloringCandidates.insert(Intf); 1957 } 1958 } 1959 return true; 1960} 1961 1962/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 1963/// its interferences. 1964/// Last chance recoloring chooses a color for \p VirtReg and recolors every 1965/// virtual register that was using it. The recoloring process may recursively 1966/// use the last chance recoloring. Therefore, when a virtual register has been 1967/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 1968/// be last-chance-recolored again during this recoloring "session". 1969/// E.g., 1970/// Let 1971/// vA can use {R1, R2 } 1972/// vB can use { R2, R3} 1973/// vC can use {R1 } 1974/// Where vA, vB, and vC cannot be split anymore (they are reloads for 1975/// instance) and they all interfere. 1976/// 1977/// vA is assigned R1 1978/// vB is assigned R2 1979/// vC tries to evict vA but vA is already done. 1980/// Regular register allocation fails. 1981/// 1982/// Last chance recoloring kicks in: 1983/// vC does as if vA was evicted => vC uses R1. 1984/// vC is marked as fixed. 1985/// vA needs to find a color. 1986/// None are available. 1987/// vA cannot evict vC: vC is a fixed virtual register now. 1988/// vA does as if vB was evicted => vA uses R2. 1989/// vB needs to find a color. 1990/// R3 is available. 1991/// Recoloring => vC = R1, vA = R2, vB = R3 1992/// 1993/// \p Order defines the preferred allocation order for \p VirtReg. 1994/// \p NewRegs will contain any new virtual register that have been created 1995/// (split, spill) during the process and that must be assigned. 1996/// \p FixedRegisters contains all the virtual registers that cannot be 1997/// recolored. 1998/// \p Depth gives the current depth of the last chance recoloring. 1999/// \return a physical register that can be used for VirtReg or ~0u if none 2000/// exists. 2001unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 2002 AllocationOrder &Order, 2003 SmallVectorImpl<unsigned> &NewVRegs, 2004 SmallVirtRegSet &FixedRegisters, 2005 unsigned Depth) { 2006 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2007 // Ranges must be Done. 2008 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2009 "Last chance recoloring should really be last chance"); 2010 // Set the max depth to LastChanceRecoloringMaxDepth. 2011 // We may want to reconsider that if we end up with a too large search space 2012 // for target with hundreds of registers. 2013 // Indeed, in that case we may want to cut the search space earlier. 2014 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2015 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2016 CutOffInfo |= CO_Depth; 2017 return ~0u; 2018 } 2019 2020 // Set of Live intervals that will need to be recolored. 2021 SmallLISet RecoloringCandidates; 2022 // Record the original mapping virtual register to physical register in case 2023 // the recoloring fails. 2024 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 2025 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2026 // this recoloring "session". 2027 FixedRegisters.insert(VirtReg.reg); 2028 2029 Order.rewind(); 2030 while (unsigned PhysReg = Order.next()) { 2031 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2032 << PrintReg(PhysReg, TRI) << '\n'); 2033 RecoloringCandidates.clear(); 2034 VirtRegToPhysReg.clear(); 2035 2036 // It is only possible to recolor virtual register interference. 2037 if (Matrix->checkInterference(VirtReg, PhysReg) > 2038 LiveRegMatrix::IK_VirtReg) { 2039 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 2040 2041 continue; 2042 } 2043 2044 // Early give up on this PhysReg if it is obvious we cannot recolor all 2045 // the interferences. 2046 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2047 FixedRegisters)) { 2048 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 2049 continue; 2050 } 2051 2052 // RecoloringCandidates contains all the virtual registers that interfer 2053 // with VirtReg on PhysReg (or one of its aliases). 2054 // Enqueue them for recoloring and perform the actual recoloring. 2055 PQueue RecoloringQueue; 2056 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2057 EndIt = RecoloringCandidates.end(); 2058 It != EndIt; ++It) { 2059 unsigned ItVirtReg = (*It)->reg; 2060 enqueue(RecoloringQueue, *It); 2061 assert(VRM->hasPhys(ItVirtReg) && 2062 "Interferences are supposed to be with allocated vairables"); 2063 2064 // Record the current allocation. 2065 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 2066 // unset the related struct. 2067 Matrix->unassign(**It); 2068 } 2069 2070 // Do as if VirtReg was assigned to PhysReg so that the underlying 2071 // recoloring has the right information about the interferes and 2072 // available colors. 2073 Matrix->assign(VirtReg, PhysReg); 2074 2075 // Save the current recoloring state. 2076 // If we cannot recolor all the interferences, we will have to start again 2077 // at this point for the next physical register. 2078 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2079 if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, 2080 Depth)) { 2081 // Do not mess up with the global assignment process. 2082 // I.e., VirtReg must be unassigned. 2083 Matrix->unassign(VirtReg); 2084 return PhysReg; 2085 } 2086 2087 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2088 << PrintReg(PhysReg, TRI) << '\n'); 2089 2090 // The recoloring attempt failed, undo the changes. 2091 FixedRegisters = SaveFixedRegisters; 2092 Matrix->unassign(VirtReg); 2093 2094 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2095 EndIt = RecoloringCandidates.end(); 2096 It != EndIt; ++It) { 2097 unsigned ItVirtReg = (*It)->reg; 2098 if (VRM->hasPhys(ItVirtReg)) 2099 Matrix->unassign(**It); 2100 Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); 2101 } 2102 } 2103 2104 // Last chance recoloring did not worked either, give up. 2105 return ~0u; 2106} 2107 2108/// tryRecoloringCandidates - Try to assign a new color to every register 2109/// in \RecoloringQueue. 2110/// \p NewRegs will contain any new virtual register created during the 2111/// recoloring process. 2112/// \p FixedRegisters[in/out] contains all the registers that have been 2113/// recolored. 2114/// \return true if all virtual registers in RecoloringQueue were successfully 2115/// recolored, false otherwise. 2116bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2117 SmallVectorImpl<unsigned> &NewVRegs, 2118 SmallVirtRegSet &FixedRegisters, 2119 unsigned Depth) { 2120 while (!RecoloringQueue.empty()) { 2121 LiveInterval *LI = dequeue(RecoloringQueue); 2122 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2123 unsigned PhysReg; 2124 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2125 if (PhysReg == ~0u || !PhysReg) 2126 return false; 2127 DEBUG(dbgs() << "Recoloring of " << *LI 2128 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2129 Matrix->assign(*LI, PhysReg); 2130 FixedRegisters.insert(LI->reg); 2131 } 2132 return true; 2133} 2134 2135//===----------------------------------------------------------------------===// 2136// Main Entry Point 2137//===----------------------------------------------------------------------===// 2138 2139unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2140 SmallVectorImpl<unsigned> &NewVRegs) { 2141 CutOffInfo = CO_None; 2142 LLVMContext &Ctx = MF->getFunction()->getContext(); 2143 SmallVirtRegSet FixedRegisters; 2144 unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2145 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2146 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2147 if (CutOffEncountered == CO_Depth) 2148 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2149 "reached. Use -fexhaustive-register-search to skip " 2150 "cutoffs"); 2151 else if (CutOffEncountered == CO_Interf) 2152 Ctx.emitError("register allocation failed: maximum interference for " 2153 "recoloring reached. Use -fexhaustive-register-search " 2154 "to skip cutoffs"); 2155 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2156 Ctx.emitError("register allocation failed: maximum interference and " 2157 "depth for recoloring reached. Use " 2158 "-fexhaustive-register-search to skip cutoffs"); 2159 } 2160 return Reg; 2161} 2162 2163/// Using a CSR for the first time has a cost because it causes push|pop 2164/// to be added to prologue|epilogue. Splitting a cold section of the live 2165/// range can have lower cost than using the CSR for the first time; 2166/// Spilling a live range in the cold path can have lower cost than using 2167/// the CSR for the first time. Returns the physical register if we decide 2168/// to use the CSR; otherwise return 0. 2169unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, 2170 AllocationOrder &Order, 2171 unsigned PhysReg, 2172 unsigned &CostPerUseLimit, 2173 SmallVectorImpl<unsigned> &NewVRegs) { 2174 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2175 // We choose spill over using the CSR for the first time if the spill cost 2176 // is lower than CSRCost. 2177 SA->analyze(&VirtReg); 2178 if (calcSpillCost() >= CSRCost) 2179 return PhysReg; 2180 2181 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2182 // we will not use a callee-saved register in tryEvict. 2183 CostPerUseLimit = 1; 2184 return 0; 2185 } 2186 if (getStage(VirtReg) < RS_Split) { 2187 // We choose pre-splitting over using the CSR for the first time if 2188 // the cost of splitting is lower than CSRCost. 2189 SA->analyze(&VirtReg); 2190 unsigned NumCands = 0; 2191 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2192 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2193 NumCands, true /*IgnoreCSR*/); 2194 if (BestCand == NoCand) 2195 // Use the CSR if we can't find a region split below CSRCost. 2196 return PhysReg; 2197 2198 // Perform the actual pre-splitting. 2199 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2200 return 0; 2201 } 2202 return PhysReg; 2203} 2204 2205void RAGreedy::initializeCSRCost() { 2206 // We use the larger one out of the command-line option and the value report 2207 // by TRI. 2208 CSRCost = BlockFrequency( 2209 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2210 if (!CSRCost.getFrequency()) 2211 return; 2212 2213 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2214 uint64_t ActualEntry = MBFI->getEntryFreq(); 2215 if (!ActualEntry) { 2216 CSRCost = 0; 2217 return; 2218 } 2219 uint64_t FixedEntry = 1 << 14; 2220 if (ActualEntry < FixedEntry) 2221 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2222 else if (ActualEntry <= UINT32_MAX) 2223 // Invert the fraction and divide. 2224 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2225 else 2226 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2227 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2228} 2229 2230unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2231 SmallVectorImpl<unsigned> &NewVRegs, 2232 SmallVirtRegSet &FixedRegisters, 2233 unsigned Depth) { 2234 unsigned CostPerUseLimit = ~0u; 2235 // First try assigning a free register. 2236 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); 2237 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { 2238 // We check other options if we are using a CSR for the first time. 2239 bool CSRFirstUse = false; 2240 if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) 2241 if (!MRI->isPhysRegUsed(CSR)) 2242 CSRFirstUse = true; 2243 2244 // When NewVRegs is not empty, we may have made decisions such as evicting 2245 // a virtual register, go with the earlier decisions and use the physical 2246 // register. 2247 if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { 2248 unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2249 CostPerUseLimit, NewVRegs); 2250 if (CSRReg || !NewVRegs.empty()) 2251 // Return now if we decide to use a CSR or create new vregs due to 2252 // pre-splitting. 2253 return CSRReg; 2254 } else 2255 return PhysReg; 2256 } 2257 2258 LiveRangeStage Stage = getStage(VirtReg); 2259 DEBUG(dbgs() << StageName[Stage] 2260 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2261 2262 // Try to evict a less worthy live range, but only for ranges from the primary 2263 // queue. The RS_Split ranges already failed to do this, and they should not 2264 // get a second chance until they have been split. 2265 if (Stage != RS_Split) 2266 if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) 2267 return PhysReg; 2268 2269 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 2270 2271 // The first time we see a live range, don't try to split or spill. 2272 // Wait until the second time, when all smaller ranges have been allocated. 2273 // This gives a better picture of the interference to split around. 2274 if (Stage < RS_Split) { 2275 setStage(VirtReg, RS_Split); 2276 DEBUG(dbgs() << "wait for second round\n"); 2277 NewVRegs.push_back(VirtReg.reg); 2278 return 0; 2279 } 2280 2281 // If we couldn't allocate a register from spilling, there is probably some 2282 // invalid inline assembly. The base class wil report it. 2283 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2284 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2285 Depth); 2286 2287 // Try splitting VirtReg or interferences. 2288 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2289 if (PhysReg || !NewVRegs.empty()) 2290 return PhysReg; 2291 2292 // Finally spill VirtReg itself. 2293 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2294 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); 2295 spiller().spill(LRE); 2296 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2297 2298 if (VerifyEnabled) 2299 MF->verify(this, "After spilling"); 2300 2301 // The live virtual register requesting allocation was spilled, so tell 2302 // the caller not to allocate anything during this round. 2303 return 0; 2304} 2305 2306bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2307 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2308 << "********** Function: " << mf.getName() << '\n'); 2309 2310 MF = &mf; 2311 TRI = MF->getTarget().getRegisterInfo(); 2312 TII = MF->getTarget().getInstrInfo(); 2313 RCI.runOnMachineFunction(mf); 2314 if (VerifyEnabled) 2315 MF->verify(this, "Before greedy register allocator"); 2316 2317 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2318 getAnalysis<LiveIntervals>(), 2319 getAnalysis<LiveRegMatrix>()); 2320 Indexes = &getAnalysis<SlotIndexes>(); 2321 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2322 DomTree = &getAnalysis<MachineDominatorTree>(); 2323 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2324 Loops = &getAnalysis<MachineLoopInfo>(); 2325 Bundles = &getAnalysis<EdgeBundles>(); 2326 SpillPlacer = &getAnalysis<SpillPlacement>(); 2327 DebugVars = &getAnalysis<LiveDebugVariables>(); 2328 2329 initializeCSRCost(); 2330 2331 calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); 2332 2333 DEBUG(LIS->dump()); 2334 2335 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2336 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); 2337 ExtraRegInfo.clear(); 2338 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2339 NextCascade = 1; 2340 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2341 GlobalCand.resize(32); // This will grow as needed. 2342 2343 allocatePhysRegs(); 2344 releaseMemory(); 2345 return true; 2346} 2347