IceRegAlloc.cpp revision ec3f56532be1792d04ed470221df663bb8ca9c19
11d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
21d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//
31d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//                        The Subzero Code Generator
41d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//
51d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// This file is distributed under the University of Illinois Open Source
61d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// License. See LICENSE.TXT for details.
71d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//
81d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===----------------------------------------------------------------------===//
91d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish///
101d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// \file
111d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// This file implements the LinearScan class, which performs the linear-scan
121d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// register allocation after liveness analysis has been performed.
131d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish///
141d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===----------------------------------------------------------------------===//
151d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
164883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin#include "IceRegAlloc.h"
171d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
181d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceCfg.h"
191d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceCfgNode.h"
201d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceInst.h"
211d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceInstVarIter.h"
221d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceOperand.h"
231d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceTargetLowering.h"
241d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
251d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishnamespace Ice {
261d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
274883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinnamespace {
284883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin
291d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// Returns true if Var has any definitions within Item's live range.
304883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// TODO(stichnot): Consider trimming the Definitions list similar to how the
314883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// live ranges are trimmed, since all the overlapsDefs() tests are whether some
324883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// variable's definitions overlap Cur, and trimming is with respect Cur.start.
334883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// Initial tests show no measurable performance difference, so we'll keep the
344883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// code simple for now.
354883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinbool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
361d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  constexpr bool UseTrimmed = true;
371d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  VariablesMetadata *VMetadata = Func->getVMetadata();
381d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
391d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
401d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish      return true;
411d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
421d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  for (size_t i = 0; i < Defs.size(); ++i) {
431d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    if (Item->getLiveRange().overlapsInst(Defs[i]->getNumber(), UseTrimmed))
441d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish      return true;
451d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  }
461d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  return false;
471d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish}
481d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
491d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishvoid dumpDisableOverlap(const Cfg *Func, const Variable *Var,
501d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish                        const char *Reason) {
511d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  if (!BuildDefs::dump())
521d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    return;
531d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  if (Func->isVerbose(IceV_LinearScan)) {
541d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    VariablesMetadata *VMetadata = Func->getVMetadata();
551d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    Ostream &Str = Func->getContext()->getStrDump();
561d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    Str << "Disabling Overlap due to " << Reason << " " << *Var
571d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish        << " LIVE=" << Var->getLiveRange() << " Defs=";
581d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
591d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish      Str << FirstDef->getNumber();
601d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
611d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    for (size_t i = 0; i < Defs.size(); ++i) {
621d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish      Str << "," << Defs[i]->getNumber();
631d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    }
641d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish    Str << "\n";
651d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  }
661d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish}
671d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
684883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinvoid dumpLiveRange(const Variable *Var, const Cfg *Func) {
694883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin  if (!BuildDefs::dump())
704883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin    return;
714883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin  Ostream &Str = Func->getContext()->getStrDump();
724883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin  char buf[30];
734883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin  snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp());
741d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  Str << "R=" << buf << "  V=";
751d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  Var->dump(Func);
761d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  Str << "  Range=" << Var->getLiveRange();
771d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish}
781d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
791d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish} // end of anonymous namespace
804883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin
811d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishLinearScan::LinearScan(Cfg *Func)
82a534d7148079f71f932e963d836c731559491021oliviermartin    : Func(Func), Ctx(Func->getContext()),
831d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish      Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {}
841d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish
851d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// Prepare for full register allocation of all variables.  We depend on
861d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// liveness analysis to have calculated live ranges.
871d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishvoid LinearScan::initForGlobal() {
881d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  TimerMarker T(TimerStack::TT_initUnhandled, Func);
891d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  FindPreference = true;
901d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  // For full register allocation, normally we want to enable FindOverlap
911d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  // (meaning we look for opportunities for two overlapping live ranges to
921d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  // safely share the same register).  However, we disable it for phi-lowering
931d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  // register allocation since no overlap opportunities should be available and
941d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  // it's more expensive to look for opportunities.
951d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish  FindOverlap = (Kind != RAK_Phi);
96  const VarList &Vars = Func->getVariables();
97  Unhandled.reserve(Vars.size());
98  UnhandledPrecolored.reserve(Vars.size());
99  // Gather the live ranges of all variables and add them to the Unhandled set.
100  for (Variable *Var : Vars) {
101    // Explicitly don't consider zero-weight variables, which are meant to be
102    // spill slots.
103    if (Var->mustNotHaveReg())
104      continue;
105    // Don't bother if the variable has a null live range, which means it was
106    // never referenced.
107    if (Var->getLiveRange().isEmpty())
108      continue;
109    Var->untrimLiveRange();
110    Unhandled.push_back(Var);
111    if (Var->hasReg()) {
112      Var->setRegNumTmp(Var->getRegNum());
113      Var->setMustHaveReg();
114      UnhandledPrecolored.push_back(Var);
115    }
116  }
117
118  // Build the (ordered) list of FakeKill instruction numbers.
119  Kills.clear();
120  // Phi lowering should not be creating new call instructions, so there should
121  // be no infinite-weight not-yet-colored live ranges that span a call
122  // instruction, hence no need to construct the Kills list.
123  if (Kind == RAK_Phi)
124    return;
125  for (CfgNode *Node : Func->getNodes()) {
126    for (Inst &I : Node->getInsts()) {
127      if (auto Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
128        if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
129          Kills.push_back(I.getNumber());
130      }
131    }
132  }
133}
134
135// Prepare for very simple register allocation of only infinite-weight
136// Variables while respecting pre-colored Variables. Some properties we take
137// advantage of:
138//
139// * Live ranges of interest consist of a single segment.
140//
141// * Live ranges of interest never span a call instruction.
142//
143// * Phi instructions are not considered because either phis have already been
144//   lowered, or they don't contain any pre-colored or infinite-weight
145//   Variables.
146//
147// * We don't need to renumber instructions before computing live ranges
148//   because all the high-level ICE instructions are deleted prior to lowering,
149//   and the low-level instructions are added in monotonically increasing order.
150//
151// * There are no opportunities for register preference or allowing overlap.
152//
153// Some properties we aren't (yet) taking advantage of:
154//
155// * Because live ranges are a single segment, the Inactive set will always be
156//   empty, and the live range trimming operation is unnecessary.
157//
158// * Calculating overlap of single-segment live ranges could be optimized a
159//   bit.
160void LinearScan::initForInfOnly() {
161  TimerMarker T(TimerStack::TT_initUnhandled, Func);
162  FindPreference = false;
163  FindOverlap = false;
164  SizeT NumVars = 0;
165  const VarList &Vars = Func->getVariables();
166
167  // Iterate across all instructions and record the begin and end of the live
168  // range for each variable that is pre-colored or infinite weight.
169  std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
170  std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
171  for (CfgNode *Node : Func->getNodes()) {
172    for (Inst &Inst : Node->getInsts()) {
173      if (Inst.isDeleted())
174        continue;
175      if (const Variable *Var = Inst.getDest()) {
176        if (!Var->getIgnoreLiveness() &&
177            (Var->hasReg() || Var->mustHaveReg())) {
178          if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
179            LRBegin[Var->getIndex()] = Inst.getNumber();
180            ++NumVars;
181          }
182        }
183      }
184      FOREACH_VAR_IN_INST(Var, Inst) {
185        if (Var->getIgnoreLiveness())
186          continue;
187        if (Var->hasReg() || Var->mustHaveReg())
188          LREnd[Var->getIndex()] = Inst.getNumber();
189      }
190    }
191  }
192
193  Unhandled.reserve(NumVars);
194  UnhandledPrecolored.reserve(NumVars);
195  for (SizeT i = 0; i < Vars.size(); ++i) {
196    Variable *Var = Vars[i];
197    if (LRBegin[i] != Inst::NumberSentinel) {
198      assert(LREnd[i] != Inst::NumberSentinel);
199      Unhandled.push_back(Var);
200      Var->resetLiveRange();
201      Var->addLiveRange(LRBegin[i], LREnd[i]);
202      Var->untrimLiveRange();
203      if (Var->hasReg()) {
204        Var->setRegNumTmp(Var->getRegNum());
205        Var->setMustHaveReg();
206        UnhandledPrecolored.push_back(Var);
207      }
208      --NumVars;
209    }
210  }
211  // This isn't actually a fatal condition, but it would be nice to know if we
212  // somehow pre-calculated Unhandled's size wrong.
213  assert(NumVars == 0);
214
215  // Don't build up the list of Kills because we know that no infinite-weight
216  // Variable has a live range spanning a call.
217  Kills.clear();
218}
219
220void LinearScan::init(RegAllocKind Kind) {
221  this->Kind = Kind;
222  Unhandled.clear();
223  UnhandledPrecolored.clear();
224  Handled.clear();
225  Inactive.clear();
226  Active.clear();
227
228  switch (Kind) {
229  case RAK_Unknown:
230    llvm::report_fatal_error("Invalid RAK_Unknown");
231    break;
232  case RAK_Global:
233  case RAK_Phi:
234    initForGlobal();
235    break;
236  case RAK_InfOnly:
237    initForInfOnly();
238    break;
239  }
240
241  auto CompareRanges = [](const Variable *L, const Variable *R) {
242    InstNumberT Lstart = L->getLiveRange().getStart();
243    InstNumberT Rstart = R->getLiveRange().getStart();
244    if (Lstart == Rstart)
245      return L->getIndex() < R->getIndex();
246    return Lstart < Rstart;
247  };
248  // Do a reverse sort so that erasing elements (from the end) is fast.
249  std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
250  std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
251            CompareRanges);
252
253  Handled.reserve(Unhandled.size());
254  Inactive.reserve(Unhandled.size());
255  Active.reserve(Unhandled.size());
256}
257
258// This is called when Cur must be allocated a register but no registers are
259// available across Cur's live range.  To handle this, we find a register that
260// is not explicitly used during Cur's live range, spill that register to a
261// stack location right before Cur's live range begins, and fill (reload) the
262// register from the stack location right after Cur's live range ends.
263void LinearScan::addSpillFill(IterationState &Iter) {
264  // Identify the actual instructions that begin and end Iter.Cur's live range.
265  // Iterate through Iter.Cur's node's instruction list until we find the actual
266  // instructions with instruction numbers corresponding to Iter.Cur's recorded
267  // live range endpoints.  This sounds inefficient but shouldn't be a problem
268  // in practice because:
269  // (1) This function is almost never called in practice.
270  // (2) Since this register over-subscription problem happens only for
271  //     phi-lowered instructions, the number of instructions in the node is
272  //     proportional to the number of phi instructions in the original node,
273  //     which is never very large in practice.
274  // (3) We still have to iterate through all instructions of Iter.Cur's live
275  //     range to find all explicitly used registers (though the live range is
276  //     usually only 2-3 instructions), so the main cost that could be avoided
277  //     would be finding the instruction that begin's Iter.Cur's live range.
278  assert(!Iter.Cur->getLiveRange().isEmpty());
279  InstNumberT Start = Iter.Cur->getLiveRange().getStart();
280  InstNumberT End = Iter.Cur->getLiveRange().getEnd();
281  CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
282  assert(Node);
283  InstList &Insts = Node->getInsts();
284  InstList::iterator SpillPoint = Insts.end();
285  InstList::iterator FillPoint = Insts.end();
286  // Stop searching after we have found both the SpillPoint and the FillPoint.
287  for (auto I = Insts.begin(), E = Insts.end();
288       I != E && (SpillPoint == E || FillPoint == E); ++I) {
289    if (I->getNumber() == Start)
290      SpillPoint = I;
291    if (I->getNumber() == End)
292      FillPoint = I;
293    if (SpillPoint != E) {
294      // Remove from RegMask any physical registers referenced during Cur's live
295      // range.  Start looking after SpillPoint gets set, i.e. once Cur's live
296      // range begins.
297      FOREACH_VAR_IN_INST(Var, *I) {
298        if (Var->hasRegTmp())
299          Iter.RegMask[Var->getRegNumTmp()] = false;
300      }
301    }
302  }
303  assert(SpillPoint != Insts.end());
304  assert(FillPoint != Insts.end());
305  ++FillPoint;
306  // TODO(stichnot): Randomize instead of find_first().
307  int32_t RegNum = Iter.RegMask.find_first();
308  assert(RegNum != -1);
309  Iter.Cur->setRegNumTmp(RegNum);
310  TargetLowering *Target = Func->getTarget();
311  Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
312  // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
313  // is correctly identified as !isMultiBlock(), reducing stack frame size.
314  Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
315  // Add "reg=FakeDef;spill=reg" before SpillPoint
316  Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
317  Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
318  // add "reg=spill;FakeUse(reg)" before FillPoint
319  Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
320  Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
321}
322
323void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
324  for (SizeT I = Active.size(); I > 0; --I) {
325    const SizeT Index = I - 1;
326    Variable *Item = Active[Index];
327    Item->trimLiveRange(Cur->getLiveRange().getStart());
328    bool Moved = false;
329    if (Item->rangeEndsBefore(Cur)) {
330      // Move Item from Active to Handled list.
331      dumpLiveRangeTrace("Expiring     ", Cur);
332      moveItem(Active, Index, Handled);
333      Moved = true;
334    } else if (!Item->rangeOverlapsStart(Cur)) {
335      // Move Item from Active to Inactive list.
336      dumpLiveRangeTrace("Inactivating ", Cur);
337      moveItem(Active, Index, Inactive);
338      Moved = true;
339    }
340    if (Moved) {
341      // Decrement Item from RegUses[].
342      assert(Item->hasRegTmp());
343      int32_t RegNum = Item->getRegNumTmp();
344      --RegUses[RegNum];
345      assert(RegUses[RegNum] >= 0);
346    }
347  }
348}
349
350void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
351  for (SizeT I = Inactive.size(); I > 0; --I) {
352    const SizeT Index = I - 1;
353    Variable *Item = Inactive[Index];
354    Item->trimLiveRange(Cur->getLiveRange().getStart());
355    if (Item->rangeEndsBefore(Cur)) {
356      // Move Item from Inactive to Handled list.
357      dumpLiveRangeTrace("Expiring     ", Cur);
358      moveItem(Inactive, Index, Handled);
359    } else if (Item->rangeOverlapsStart(Cur)) {
360      // Move Item from Inactive to Active list.
361      dumpLiveRangeTrace("Reactivating ", Cur);
362      moveItem(Inactive, Index, Active);
363      // Increment Item in RegUses[].
364      assert(Item->hasRegTmp());
365      int32_t RegNum = Item->getRegNumTmp();
366      assert(RegUses[RegNum] >= 0);
367      ++RegUses[RegNum];
368    }
369  }
370}
371
372// Infer register preference and allowable overlap. Only form a preference when
373// the current Variable has an unambiguous "first" definition. The preference
374// is some source Variable of the defining instruction that either is assigned
375// a register that is currently free, or that is assigned a register that is
376// not free but overlap is allowed. Overlap is allowed when the Variable under
377// consideration is single-definition, and its definition is a simple
378// assignment - i.e., the register gets copied/aliased but is never modified.
379// Furthermore, overlap is only allowed when preferred Variable definition
380// instructions do not appear within the current Variable's live range.
381void LinearScan::findRegisterPreference(IterationState &Iter) {
382  Iter.Prefer = nullptr;
383  Iter.PreferReg = Variable::NoRegister;
384  Iter.AllowOverlap = false;
385
386  if (FindPreference) {
387    VariablesMetadata *VMetadata = Func->getVMetadata();
388    if (const Inst *DefInst = VMetadata->getFirstDefinition(Iter.Cur)) {
389      assert(DefInst->getDest() == Iter.Cur);
390      bool IsAssign = DefInst->isSimpleAssign();
391      bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur);
392      for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) {
393        // TODO(stichnot): Iterate through the actual Variables of the
394        // instruction, not just the source operands. This could capture Load
395        // instructions, including address mode optimization, for Prefer (but
396        // not for AllowOverlap).
397        if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
398          int32_t SrcReg = SrcVar->getRegNumTmp();
399          // Only consider source variables that have (so far) been assigned a
400          // register. That register must be one in the RegMask set, e.g.
401          // don't try to prefer the stack pointer as a result of the stacksave
402          // intrinsic.
403          if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) {
404            if (FindOverlap && !Iter.Free[SrcReg]) {
405              // Don't bother trying to enable AllowOverlap if the register is
406              // already free.
407              Iter.AllowOverlap = IsSingleDef && IsAssign &&
408                                  !overlapsDefs(Func, Iter.Cur, SrcVar);
409            }
410            if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
411              Iter.Prefer = SrcVar;
412              Iter.PreferReg = SrcReg;
413            }
414          }
415        }
416      }
417      if (Verbose && Iter.Prefer) {
418        Ostream &Str = Ctx->getStrDump();
419        Str << "Initial Iter.Prefer=";
420        Iter.Prefer->dump(Func);
421        Str << " R=" << Iter.PreferReg
422            << " LIVE=" << Iter.Prefer->getLiveRange()
423            << " Overlap=" << Iter.AllowOverlap << "\n";
424      }
425    }
426  }
427}
428
429// Remove registers from the Free[] list where an Inactive range overlaps with
430// the current range.
431void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
432  for (const Variable *Item : Inactive) {
433    if (Item->rangeOverlaps(Iter.Cur)) {
434      int32_t RegNum = Item->getRegNumTmp();
435      // Don't assert(Free[RegNum]) because in theory (though probably never in
436      // practice) there could be two inactive variables that were marked with
437      // AllowOverlap.
438      Iter.Free[RegNum] = false;
439      // Disable AllowOverlap if an Inactive variable, which is not Prefer,
440      // shares Prefer's register, and has a definition within Cur's live
441      // range.
442      if (Iter.AllowOverlap && Item != Iter.Prefer &&
443          RegNum == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
444        Iter.AllowOverlap = false;
445        dumpDisableOverlap(Func, Item, "Inactive");
446      }
447    }
448  }
449}
450
451// Remove registers from the Free[] list where an Unhandled pre-colored range
452// overlaps with the current range, and set those registers to infinite weight
453// so that they aren't candidates for eviction.  Cur->rangeEndsBefore(Item) is
454// an early exit check that turns a guaranteed O(N^2) algorithm into expected
455// linear complexity.
456void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
457  for (Variable *Item : reverse_range(UnhandledPrecolored)) {
458    assert(Item->hasReg());
459    if (Iter.Cur->rangeEndsBefore(Item))
460      break;
461    if (Item->rangeOverlaps(Iter.Cur)) {
462      int32_t ItemReg = Item->getRegNum(); // Note: not getRegNumTmp()
463      Iter.Weights[ItemReg].setWeight(RegWeight::Inf);
464      Iter.Free[ItemReg] = false;
465      Iter.PrecoloredUnhandledMask[ItemReg] = true;
466      // Disable Iter.AllowOverlap if the preferred register is one of these
467      // pre-colored unhandled overlapping ranges.
468      if (Iter.AllowOverlap && ItemReg == Iter.PreferReg) {
469        Iter.AllowOverlap = false;
470        dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
471      }
472    }
473  }
474}
475
476void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
477  int32_t RegNum = Cur->getRegNum();
478  // RegNumTmp should have already been set above.
479  assert(Cur->getRegNumTmp() == RegNum);
480  dumpLiveRangeTrace("Precoloring  ", Cur);
481  Active.push_back(Cur);
482  assert(RegUses[RegNum] >= 0);
483  ++RegUses[RegNum];
484  assert(!UnhandledPrecolored.empty());
485  assert(UnhandledPrecolored.back() == Cur);
486  UnhandledPrecolored.pop_back();
487}
488
489void LinearScan::allocatePreferredRegister(IterationState &Iter) {
490  Iter.Cur->setRegNumTmp(Iter.PreferReg);
491  dumpLiveRangeTrace("Preferring   ", Iter.Cur);
492  assert(RegUses[Iter.PreferReg] >= 0);
493  ++RegUses[Iter.PreferReg];
494  Active.push_back(Iter.Cur);
495}
496
497void LinearScan::allocateFreeRegister(IterationState &Iter) {
498  int32_t RegNum = Iter.Free.find_first();
499  Iter.Cur->setRegNumTmp(RegNum);
500  dumpLiveRangeTrace("Allocating   ", Iter.Cur);
501  assert(RegUses[RegNum] >= 0);
502  ++RegUses[RegNum];
503  Active.push_back(Iter.Cur);
504}
505
506void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
507  // Check Active ranges.
508  for (const Variable *Item : Active) {
509    assert(Item->rangeOverlaps(Iter.Cur));
510    int32_t RegNum = Item->getRegNumTmp();
511    assert(Item->hasRegTmp());
512    Iter.Weights[RegNum].addWeight(Item->getWeight(Func));
513  }
514  // Same as above, but check Inactive ranges instead of Active.
515  for (const Variable *Item : Inactive) {
516    int32_t RegNum = Item->getRegNumTmp();
517    assert(Item->hasRegTmp());
518    if (Item->rangeOverlaps(Iter.Cur))
519      Iter.Weights[RegNum].addWeight(Item->getWeight(Func));
520  }
521
522  // All the weights are now calculated. Find the register with smallest
523  // weight.
524  int32_t MinWeightIndex = Iter.RegMask.find_first();
525  // MinWeightIndex must be valid because of the initial RegMask.any() test.
526  assert(MinWeightIndex >= 0);
527  for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) {
528    if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex])
529      MinWeightIndex = i;
530  }
531
532  if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
533    // Cur doesn't have priority over any other live ranges, so don't allocate
534    // any register to it, and move it to the Handled state.
535    Handled.push_back(Iter.Cur);
536    if (Iter.Cur->mustHaveReg()) {
537      if (Kind == RAK_Phi)
538        addSpillFill(Iter);
539      else
540        Func->setError("Unable to find a physical register for an "
541                       "infinite-weight live range");
542    }
543  } else {
544    // Evict all live ranges in Active that register number MinWeightIndex is
545    // assigned to.
546    for (SizeT I = Active.size(); I > 0; --I) {
547      const SizeT Index = I - 1;
548      Variable *Item = Active[Index];
549      if (Item->getRegNumTmp() == MinWeightIndex) {
550        dumpLiveRangeTrace("Evicting     ", Item);
551        --RegUses[MinWeightIndex];
552        assert(RegUses[MinWeightIndex] >= 0);
553        Item->setRegNumTmp(Variable::NoRegister);
554        moveItem(Active, Index, Handled);
555      }
556    }
557    // Do the same for Inactive.
558    for (SizeT I = Inactive.size(); I > 0; --I) {
559      const SizeT Index = I - 1;
560      Variable *Item = Inactive[Index];
561      // Note: The Item->rangeOverlaps(Cur) clause is not part of the
562      // description of AssignMemLoc() in the original paper.  But there
563      // doesn't seem to be any need to evict an inactive live range that
564      // doesn't overlap with the live range currently being considered. It's
565      // especially bad if we would end up evicting an infinite-weight but
566      // currently-inactive live range. The most common situation for this
567      // would be a scratch register kill set for call instructions.
568      if (Item->getRegNumTmp() == MinWeightIndex &&
569          Item->rangeOverlaps(Iter.Cur)) {
570        dumpLiveRangeTrace("Evicting     ", Item);
571        Item->setRegNumTmp(Variable::NoRegister);
572        moveItem(Inactive, Index, Handled);
573      }
574    }
575    // Assign the register to Cur.
576    Iter.Cur->setRegNumTmp(MinWeightIndex);
577    assert(RegUses[MinWeightIndex] >= 0);
578    ++RegUses[MinWeightIndex];
579    Active.push_back(Iter.Cur);
580    dumpLiveRangeTrace("Allocating   ", Iter.Cur);
581  }
582}
583
584void LinearScan::assignFinalRegisters(
585    const llvm::SmallBitVector &RegMaskFull,
586    const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) {
587  const size_t NumRegisters = RegMaskFull.size();
588  llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters);
589  if (Randomized) {
590    // Create a random number generator for regalloc randomization. Merge
591    // function's sequence and Kind value as the Salt. Because regAlloc() is
592    // called twice under O2, the second time with RAK_Phi, we check
593    // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt
594    // is different.
595    uint64_t Salt =
596        (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
597    Func->getTarget()->makeRandomRegisterPermutation(
598        Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
599  }
600
601  // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
602  // for each Variable.
603  for (Variable *Item : Handled) {
604    int32_t RegNum = Item->getRegNumTmp();
605    int32_t AssignedRegNum = RegNum;
606
607    if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
608      AssignedRegNum = Permutation[RegNum];
609    }
610    if (Verbose) {
611      Ostream &Str = Ctx->getStrDump();
612      if (!Item->hasRegTmp()) {
613        Str << "Not assigning ";
614        Item->dump(Func);
615        Str << "\n";
616      } else {
617        Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
618                                                    : "Assigning ")
619            << Func->getTarget()->getRegName(AssignedRegNum, IceType_i32)
620            << "(r" << AssignedRegNum << ") to ";
621        Item->dump(Func);
622        Str << "\n";
623      }
624    }
625    Item->setRegNum(AssignedRegNum);
626  }
627}
628
629// Implements the linear-scan algorithm. Based on "Linear Scan Register
630// Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
631// Mössenböck and Michael Pfeiffer,
632// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
633// modified to take affinity into account and allow two interfering variables
634// to share the same register in certain cases.
635//
636// Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
637// are assigned to Variable::RegNum for each Variable.
638void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
639                      bool Randomized) {
640  TimerMarker T(TimerStack::TT_linearScan, Func);
641  assert(RegMaskFull.any()); // Sanity check
642  if (Verbose)
643    Ctx->lockStr();
644  Func->resetCurrentNode();
645  const size_t NumRegisters = RegMaskFull.size();
646  llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
647  if (Randomized) {
648    for (Variable *Var : UnhandledPrecolored) {
649      PreDefinedRegisters[Var->getRegNum()] = true;
650    }
651  }
652
653  // Build a LiveRange representing the Kills list.
654  LiveRange KillsRange(Kills);
655  KillsRange.untrim();
656
657  // Reset the register use count
658  RegUses.resize(NumRegisters);
659  std::fill(RegUses.begin(), RegUses.end(), 0);
660
661  // Unhandled is already set to all ranges in increasing order of start
662  // points.
663  assert(Active.empty());
664  assert(Inactive.empty());
665  assert(Handled.empty());
666  const TargetLowering::RegSetMask RegsInclude =
667      TargetLowering::RegSet_CallerSave;
668  const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
669  const llvm::SmallBitVector KillsMask =
670      Func->getTarget()->getRegisterSet(RegsInclude, RegsExclude);
671
672  // Allocate memory once outside the loop
673  IterationState Iter;
674  Iter.Weights.reserve(NumRegisters);
675  Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
676
677  while (!Unhandled.empty()) {
678    Iter.Cur = Unhandled.back();
679    Unhandled.pop_back();
680    dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
681    Iter.RegMask =
682        RegMaskFull &
683        Func->getTarget()->getRegisterSetForType(Iter.Cur->getType());
684    KillsRange.trim(Iter.Cur->getLiveRange().getStart());
685
686    // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
687    // that register. Previously processed live ranges would have avoided that
688    // register due to it being pre-colored. Future processed live ranges won't
689    // evict that register because the live range has infinite weight.
690    if (Iter.Cur->hasReg()) {
691      allocatePrecoloredRegister(Iter.Cur);
692      continue;
693    }
694
695    handleActiveRangeExpiredOrInactive(Iter.Cur);
696    handleInactiveRangeExpiredOrReactivated(Iter.Cur);
697
698    // Calculate available registers into Free[].
699    Iter.Free = Iter.RegMask;
700    for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
701      if (RegUses[i] > 0)
702        Iter.Free[i] = false;
703    }
704
705    findRegisterPreference(Iter);
706    filterFreeWithInactiveRanges(Iter);
707
708    // Disable AllowOverlap if an Active variable, which is not Prefer, shares
709    // Prefer's register, and has a definition within Cur's live range.
710    if (Iter.AllowOverlap) {
711      for (const Variable *Item : Active) {
712        int32_t RegNum = Item->getRegNumTmp();
713        if (Item != Iter.Prefer && RegNum == Iter.PreferReg &&
714            overlapsDefs(Func, Iter.Cur, Item)) {
715          Iter.AllowOverlap = false;
716          dumpDisableOverlap(Func, Item, "Active");
717        }
718      }
719    }
720
721    Iter.Weights.resize(Iter.RegMask.size());
722    std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
723
724    Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
725    Iter.PrecoloredUnhandledMask.reset();
726
727    filterFreeWithPrecoloredRanges(Iter);
728
729    // Remove scratch registers from the Free[] list, and mark their Weights[]
730    // as infinite, if KillsRange overlaps Cur's live range.
731    constexpr bool UseTrimmed = true;
732    if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
733      Iter.Free.reset(KillsMask);
734      for (int i = KillsMask.find_first(); i != -1;
735           i = KillsMask.find_next(i)) {
736        Iter.Weights[i].setWeight(RegWeight::Inf);
737        if (Iter.PreferReg == i)
738          Iter.AllowOverlap = false;
739      }
740    }
741
742    // Print info about physical register availability.
743    if (Verbose) {
744      Ostream &Str = Ctx->getStrDump();
745      for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
746        if (Iter.RegMask[i]) {
747          Str << Func->getTarget()->getRegName(i, IceType_i32)
748              << "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
749              << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
750        }
751      }
752      Str << "\n";
753    }
754
755    if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
756      // First choice: a preferred register that is either free or is allowed
757      // to overlap with its linked variable.
758      allocatePreferredRegister(Iter);
759    } else if (Iter.Free.any()) {
760      // Second choice: any free register.
761      allocateFreeRegister(Iter);
762    } else {
763      // Fallback: there are no free registers, so we look for the
764      // lowest-weight register and see if Cur has higher weight.
765      handleNoFreeRegisters(Iter);
766    }
767    dump(Func);
768  }
769
770  // Move anything Active or Inactive to Handled for easier handling.
771  Handled.insert(Handled.end(), Active.begin(), Active.end());
772  Active.clear();
773  Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
774  Inactive.clear();
775  dump(Func);
776
777  assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
778
779  // TODO: Consider running register allocation one more time, with infinite
780  // registers, for two reasons. First, evicted live ranges get a second chance
781  // for a register. Second, it allows coalescing of stack slots. If there is
782  // no time budget for the second register allocation run, each unallocated
783  // variable just gets its own slot.
784  //
785  // Another idea for coalescing stack slots is to initialize the Unhandled
786  // list with just the unallocated variables, saving time but not offering
787  // second-chance opportunities.
788
789  if (Verbose)
790    Ctx->unlockStr();
791}
792
793// ======================== Dump routines ======================== //
794
795void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
796  if (!BuildDefs::dump())
797    return;
798
799  if (Verbose) {
800    Ostream &Str = Ctx->getStrDump();
801    Str << Label;
802    dumpLiveRange(Item, Func);
803    Str << "\n";
804  }
805}
806
807void LinearScan::dump(Cfg *Func) const {
808  if (!BuildDefs::dump())
809    return;
810  if (!Func->isVerbose(IceV_LinearScan))
811    return;
812  Ostream &Str = Func->getContext()->getStrDump();
813  Func->resetCurrentNode();
814  Str << "**** Current regalloc state:\n";
815  Str << "++++++ Handled:\n";
816  for (const Variable *Item : Handled) {
817    dumpLiveRange(Item, Func);
818    Str << "\n";
819  }
820  Str << "++++++ Unhandled:\n";
821  for (const Variable *Item : reverse_range(Unhandled)) {
822    dumpLiveRange(Item, Func);
823    Str << "\n";
824  }
825  Str << "++++++ Active:\n";
826  for (const Variable *Item : Active) {
827    dumpLiveRange(Item, Func);
828    Str << "\n";
829  }
830  Str << "++++++ Inactive:\n";
831  for (const Variable *Item : Inactive) {
832    dumpLiveRange(Item, Func);
833    Str << "\n";
834  }
835}
836
837} // end of namespace Ice
838