IceRegAlloc.cpp revision ec3f56532be1792d04ed470221df663bb8ca9c19
11d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// 21d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// 31d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// The Subzero Code Generator 41d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// 51d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// This file is distributed under the University of Illinois Open Source 61d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// License. See LICENSE.TXT for details. 71d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// 81d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===----------------------------------------------------------------------===// 91d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// 101d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// \file 111d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// This file implements the LinearScan class, which performs the linear-scan 121d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// register allocation after liveness analysis has been performed. 131d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish/// 141d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish//===----------------------------------------------------------------------===// 151d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 164883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin#include "IceRegAlloc.h" 171d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 181d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceCfg.h" 191d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceCfgNode.h" 201d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceInst.h" 211d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceInstVarIter.h" 221d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceOperand.h" 231d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish#include "IceTargetLowering.h" 241d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 251d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishnamespace Ice { 261d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 274883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinnamespace { 284883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin 291d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// Returns true if Var has any definitions within Item's live range. 304883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// TODO(stichnot): Consider trimming the Definitions list similar to how the 314883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// live ranges are trimmed, since all the overlapsDefs() tests are whether some 324883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// variable's definitions overlap Cur, and trimming is with respect Cur.start. 334883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// Initial tests show no measurable performance difference, so we'll keep the 344883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin// code simple for now. 354883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinbool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) { 361d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish constexpr bool UseTrimmed = true; 371d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish VariablesMetadata *VMetadata = Func->getVMetadata(); 381d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 391d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed)) 401d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish return true; 411d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); 421d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish for (size_t i = 0; i < Defs.size(); ++i) { 431d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (Item->getLiveRange().overlapsInst(Defs[i]->getNumber(), UseTrimmed)) 441d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish return true; 451d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish } 461d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish return false; 471d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish} 481d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 491d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishvoid dumpDisableOverlap(const Cfg *Func, const Variable *Var, 501d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish const char *Reason) { 511d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (!BuildDefs::dump()) 521d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish return; 531d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (Func->isVerbose(IceV_LinearScan)) { 541d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish VariablesMetadata *VMetadata = Func->getVMetadata(); 551d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Ostream &Str = Func->getContext()->getStrDump(); 561d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << "Disabling Overlap due to " << Reason << " " << *Var 571d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish << " LIVE=" << Var->getLiveRange() << " Defs="; 581d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 591d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << FirstDef->getNumber(); 601d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); 611d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish for (size_t i = 0; i < Defs.size(); ++i) { 621d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << "," << Defs[i]->getNumber(); 631d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish } 641d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << "\n"; 651d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish } 661d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish} 671d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 684883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartinvoid dumpLiveRange(const Variable *Var, const Cfg *Func) { 694883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin if (!BuildDefs::dump()) 704883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin return; 714883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin Ostream &Str = Func->getContext()->getStrDump(); 724883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin char buf[30]; 734883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp()); 741d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << "R=" << buf << " V="; 751d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Var->dump(Func); 761d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Str << " Range=" << Var->getLiveRange(); 771d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish} 781d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 791d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish} // end of anonymous namespace 804883513061c7bbcb805cd310b82ab6abb546b1aaoliviermartin 811d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishLinearScan::LinearScan(Cfg *Func) 82a534d7148079f71f932e963d836c731559491021oliviermartin : Func(Func), Ctx(Func->getContext()), 831d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} 841d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish 851d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// Prepare for full register allocation of all variables. We depend on 861d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish// liveness analysis to have calculated live ranges. 871d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfishvoid LinearScan::initForGlobal() { 881d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish TimerMarker T(TimerStack::TT_initUnhandled, Func); 891d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish FindPreference = true; 901d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish // For full register allocation, normally we want to enable FindOverlap 911d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish // (meaning we look for opportunities for two overlapping live ranges to 921d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish // safely share the same register). However, we disable it for phi-lowering 931d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish // register allocation since no overlap opportunities should be available and 941d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish // it's more expensive to look for opportunities. 951d5d0ae92d95410f20bc6daab7a47e129fb2547aandrewfish FindOverlap = (Kind != RAK_Phi); 96 const VarList &Vars = Func->getVariables(); 97 Unhandled.reserve(Vars.size()); 98 UnhandledPrecolored.reserve(Vars.size()); 99 // Gather the live ranges of all variables and add them to the Unhandled set. 100 for (Variable *Var : Vars) { 101 // Explicitly don't consider zero-weight variables, which are meant to be 102 // spill slots. 103 if (Var->mustNotHaveReg()) 104 continue; 105 // Don't bother if the variable has a null live range, which means it was 106 // never referenced. 107 if (Var->getLiveRange().isEmpty()) 108 continue; 109 Var->untrimLiveRange(); 110 Unhandled.push_back(Var); 111 if (Var->hasReg()) { 112 Var->setRegNumTmp(Var->getRegNum()); 113 Var->setMustHaveReg(); 114 UnhandledPrecolored.push_back(Var); 115 } 116 } 117 118 // Build the (ordered) list of FakeKill instruction numbers. 119 Kills.clear(); 120 // Phi lowering should not be creating new call instructions, so there should 121 // be no infinite-weight not-yet-colored live ranges that span a call 122 // instruction, hence no need to construct the Kills list. 123 if (Kind == RAK_Phi) 124 return; 125 for (CfgNode *Node : Func->getNodes()) { 126 for (Inst &I : Node->getInsts()) { 127 if (auto Kill = llvm::dyn_cast<InstFakeKill>(&I)) { 128 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted()) 129 Kills.push_back(I.getNumber()); 130 } 131 } 132 } 133} 134 135// Prepare for very simple register allocation of only infinite-weight 136// Variables while respecting pre-colored Variables. Some properties we take 137// advantage of: 138// 139// * Live ranges of interest consist of a single segment. 140// 141// * Live ranges of interest never span a call instruction. 142// 143// * Phi instructions are not considered because either phis have already been 144// lowered, or they don't contain any pre-colored or infinite-weight 145// Variables. 146// 147// * We don't need to renumber instructions before computing live ranges 148// because all the high-level ICE instructions are deleted prior to lowering, 149// and the low-level instructions are added in monotonically increasing order. 150// 151// * There are no opportunities for register preference or allowing overlap. 152// 153// Some properties we aren't (yet) taking advantage of: 154// 155// * Because live ranges are a single segment, the Inactive set will always be 156// empty, and the live range trimming operation is unnecessary. 157// 158// * Calculating overlap of single-segment live ranges could be optimized a 159// bit. 160void LinearScan::initForInfOnly() { 161 TimerMarker T(TimerStack::TT_initUnhandled, Func); 162 FindPreference = false; 163 FindOverlap = false; 164 SizeT NumVars = 0; 165 const VarList &Vars = Func->getVariables(); 166 167 // Iterate across all instructions and record the begin and end of the live 168 // range for each variable that is pre-colored or infinite weight. 169 std::vector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel); 170 std::vector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel); 171 for (CfgNode *Node : Func->getNodes()) { 172 for (Inst &Inst : Node->getInsts()) { 173 if (Inst.isDeleted()) 174 continue; 175 if (const Variable *Var = Inst.getDest()) { 176 if (!Var->getIgnoreLiveness() && 177 (Var->hasReg() || Var->mustHaveReg())) { 178 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) { 179 LRBegin[Var->getIndex()] = Inst.getNumber(); 180 ++NumVars; 181 } 182 } 183 } 184 FOREACH_VAR_IN_INST(Var, Inst) { 185 if (Var->getIgnoreLiveness()) 186 continue; 187 if (Var->hasReg() || Var->mustHaveReg()) 188 LREnd[Var->getIndex()] = Inst.getNumber(); 189 } 190 } 191 } 192 193 Unhandled.reserve(NumVars); 194 UnhandledPrecolored.reserve(NumVars); 195 for (SizeT i = 0; i < Vars.size(); ++i) { 196 Variable *Var = Vars[i]; 197 if (LRBegin[i] != Inst::NumberSentinel) { 198 assert(LREnd[i] != Inst::NumberSentinel); 199 Unhandled.push_back(Var); 200 Var->resetLiveRange(); 201 Var->addLiveRange(LRBegin[i], LREnd[i]); 202 Var->untrimLiveRange(); 203 if (Var->hasReg()) { 204 Var->setRegNumTmp(Var->getRegNum()); 205 Var->setMustHaveReg(); 206 UnhandledPrecolored.push_back(Var); 207 } 208 --NumVars; 209 } 210 } 211 // This isn't actually a fatal condition, but it would be nice to know if we 212 // somehow pre-calculated Unhandled's size wrong. 213 assert(NumVars == 0); 214 215 // Don't build up the list of Kills because we know that no infinite-weight 216 // Variable has a live range spanning a call. 217 Kills.clear(); 218} 219 220void LinearScan::init(RegAllocKind Kind) { 221 this->Kind = Kind; 222 Unhandled.clear(); 223 UnhandledPrecolored.clear(); 224 Handled.clear(); 225 Inactive.clear(); 226 Active.clear(); 227 228 switch (Kind) { 229 case RAK_Unknown: 230 llvm::report_fatal_error("Invalid RAK_Unknown"); 231 break; 232 case RAK_Global: 233 case RAK_Phi: 234 initForGlobal(); 235 break; 236 case RAK_InfOnly: 237 initForInfOnly(); 238 break; 239 } 240 241 auto CompareRanges = [](const Variable *L, const Variable *R) { 242 InstNumberT Lstart = L->getLiveRange().getStart(); 243 InstNumberT Rstart = R->getLiveRange().getStart(); 244 if (Lstart == Rstart) 245 return L->getIndex() < R->getIndex(); 246 return Lstart < Rstart; 247 }; 248 // Do a reverse sort so that erasing elements (from the end) is fast. 249 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); 250 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), 251 CompareRanges); 252 253 Handled.reserve(Unhandled.size()); 254 Inactive.reserve(Unhandled.size()); 255 Active.reserve(Unhandled.size()); 256} 257 258// This is called when Cur must be allocated a register but no registers are 259// available across Cur's live range. To handle this, we find a register that 260// is not explicitly used during Cur's live range, spill that register to a 261// stack location right before Cur's live range begins, and fill (reload) the 262// register from the stack location right after Cur's live range ends. 263void LinearScan::addSpillFill(IterationState &Iter) { 264 // Identify the actual instructions that begin and end Iter.Cur's live range. 265 // Iterate through Iter.Cur's node's instruction list until we find the actual 266 // instructions with instruction numbers corresponding to Iter.Cur's recorded 267 // live range endpoints. This sounds inefficient but shouldn't be a problem 268 // in practice because: 269 // (1) This function is almost never called in practice. 270 // (2) Since this register over-subscription problem happens only for 271 // phi-lowered instructions, the number of instructions in the node is 272 // proportional to the number of phi instructions in the original node, 273 // which is never very large in practice. 274 // (3) We still have to iterate through all instructions of Iter.Cur's live 275 // range to find all explicitly used registers (though the live range is 276 // usually only 2-3 instructions), so the main cost that could be avoided 277 // would be finding the instruction that begin's Iter.Cur's live range. 278 assert(!Iter.Cur->getLiveRange().isEmpty()); 279 InstNumberT Start = Iter.Cur->getLiveRange().getStart(); 280 InstNumberT End = Iter.Cur->getLiveRange().getEnd(); 281 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur); 282 assert(Node); 283 InstList &Insts = Node->getInsts(); 284 InstList::iterator SpillPoint = Insts.end(); 285 InstList::iterator FillPoint = Insts.end(); 286 // Stop searching after we have found both the SpillPoint and the FillPoint. 287 for (auto I = Insts.begin(), E = Insts.end(); 288 I != E && (SpillPoint == E || FillPoint == E); ++I) { 289 if (I->getNumber() == Start) 290 SpillPoint = I; 291 if (I->getNumber() == End) 292 FillPoint = I; 293 if (SpillPoint != E) { 294 // Remove from RegMask any physical registers referenced during Cur's live 295 // range. Start looking after SpillPoint gets set, i.e. once Cur's live 296 // range begins. 297 FOREACH_VAR_IN_INST(Var, *I) { 298 if (Var->hasRegTmp()) 299 Iter.RegMask[Var->getRegNumTmp()] = false; 300 } 301 } 302 } 303 assert(SpillPoint != Insts.end()); 304 assert(FillPoint != Insts.end()); 305 ++FillPoint; 306 // TODO(stichnot): Randomize instead of find_first(). 307 int32_t RegNum = Iter.RegMask.find_first(); 308 assert(RegNum != -1); 309 Iter.Cur->setRegNumTmp(RegNum); 310 TargetLowering *Target = Func->getTarget(); 311 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); 312 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc 313 // is correctly identified as !isMultiBlock(), reducing stack frame size. 314 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); 315 // Add "reg=FakeDef;spill=reg" before SpillPoint 316 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); 317 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); 318 // add "reg=spill;FakeUse(reg)" before FillPoint 319 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); 320 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); 321} 322 323void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { 324 for (SizeT I = Active.size(); I > 0; --I) { 325 const SizeT Index = I - 1; 326 Variable *Item = Active[Index]; 327 Item->trimLiveRange(Cur->getLiveRange().getStart()); 328 bool Moved = false; 329 if (Item->rangeEndsBefore(Cur)) { 330 // Move Item from Active to Handled list. 331 dumpLiveRangeTrace("Expiring ", Cur); 332 moveItem(Active, Index, Handled); 333 Moved = true; 334 } else if (!Item->rangeOverlapsStart(Cur)) { 335 // Move Item from Active to Inactive list. 336 dumpLiveRangeTrace("Inactivating ", Cur); 337 moveItem(Active, Index, Inactive); 338 Moved = true; 339 } 340 if (Moved) { 341 // Decrement Item from RegUses[]. 342 assert(Item->hasRegTmp()); 343 int32_t RegNum = Item->getRegNumTmp(); 344 --RegUses[RegNum]; 345 assert(RegUses[RegNum] >= 0); 346 } 347 } 348} 349 350void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) { 351 for (SizeT I = Inactive.size(); I > 0; --I) { 352 const SizeT Index = I - 1; 353 Variable *Item = Inactive[Index]; 354 Item->trimLiveRange(Cur->getLiveRange().getStart()); 355 if (Item->rangeEndsBefore(Cur)) { 356 // Move Item from Inactive to Handled list. 357 dumpLiveRangeTrace("Expiring ", Cur); 358 moveItem(Inactive, Index, Handled); 359 } else if (Item->rangeOverlapsStart(Cur)) { 360 // Move Item from Inactive to Active list. 361 dumpLiveRangeTrace("Reactivating ", Cur); 362 moveItem(Inactive, Index, Active); 363 // Increment Item in RegUses[]. 364 assert(Item->hasRegTmp()); 365 int32_t RegNum = Item->getRegNumTmp(); 366 assert(RegUses[RegNum] >= 0); 367 ++RegUses[RegNum]; 368 } 369 } 370} 371 372// Infer register preference and allowable overlap. Only form a preference when 373// the current Variable has an unambiguous "first" definition. The preference 374// is some source Variable of the defining instruction that either is assigned 375// a register that is currently free, or that is assigned a register that is 376// not free but overlap is allowed. Overlap is allowed when the Variable under 377// consideration is single-definition, and its definition is a simple 378// assignment - i.e., the register gets copied/aliased but is never modified. 379// Furthermore, overlap is only allowed when preferred Variable definition 380// instructions do not appear within the current Variable's live range. 381void LinearScan::findRegisterPreference(IterationState &Iter) { 382 Iter.Prefer = nullptr; 383 Iter.PreferReg = Variable::NoRegister; 384 Iter.AllowOverlap = false; 385 386 if (FindPreference) { 387 VariablesMetadata *VMetadata = Func->getVMetadata(); 388 if (const Inst *DefInst = VMetadata->getFirstDefinition(Iter.Cur)) { 389 assert(DefInst->getDest() == Iter.Cur); 390 bool IsAssign = DefInst->isSimpleAssign(); 391 bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); 392 for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { 393 // TODO(stichnot): Iterate through the actual Variables of the 394 // instruction, not just the source operands. This could capture Load 395 // instructions, including address mode optimization, for Prefer (but 396 // not for AllowOverlap). 397 if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { 398 int32_t SrcReg = SrcVar->getRegNumTmp(); 399 // Only consider source variables that have (so far) been assigned a 400 // register. That register must be one in the RegMask set, e.g. 401 // don't try to prefer the stack pointer as a result of the stacksave 402 // intrinsic. 403 if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { 404 if (FindOverlap && !Iter.Free[SrcReg]) { 405 // Don't bother trying to enable AllowOverlap if the register is 406 // already free. 407 Iter.AllowOverlap = IsSingleDef && IsAssign && 408 !overlapsDefs(Func, Iter.Cur, SrcVar); 409 } 410 if (Iter.AllowOverlap || Iter.Free[SrcReg]) { 411 Iter.Prefer = SrcVar; 412 Iter.PreferReg = SrcReg; 413 } 414 } 415 } 416 } 417 if (Verbose && Iter.Prefer) { 418 Ostream &Str = Ctx->getStrDump(); 419 Str << "Initial Iter.Prefer="; 420 Iter.Prefer->dump(Func); 421 Str << " R=" << Iter.PreferReg 422 << " LIVE=" << Iter.Prefer->getLiveRange() 423 << " Overlap=" << Iter.AllowOverlap << "\n"; 424 } 425 } 426 } 427} 428 429// Remove registers from the Free[] list where an Inactive range overlaps with 430// the current range. 431void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { 432 for (const Variable *Item : Inactive) { 433 if (Item->rangeOverlaps(Iter.Cur)) { 434 int32_t RegNum = Item->getRegNumTmp(); 435 // Don't assert(Free[RegNum]) because in theory (though probably never in 436 // practice) there could be two inactive variables that were marked with 437 // AllowOverlap. 438 Iter.Free[RegNum] = false; 439 // Disable AllowOverlap if an Inactive variable, which is not Prefer, 440 // shares Prefer's register, and has a definition within Cur's live 441 // range. 442 if (Iter.AllowOverlap && Item != Iter.Prefer && 443 RegNum == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { 444 Iter.AllowOverlap = false; 445 dumpDisableOverlap(Func, Item, "Inactive"); 446 } 447 } 448 } 449} 450 451// Remove registers from the Free[] list where an Unhandled pre-colored range 452// overlaps with the current range, and set those registers to infinite weight 453// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is 454// an early exit check that turns a guaranteed O(N^2) algorithm into expected 455// linear complexity. 456void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { 457 for (Variable *Item : reverse_range(UnhandledPrecolored)) { 458 assert(Item->hasReg()); 459 if (Iter.Cur->rangeEndsBefore(Item)) 460 break; 461 if (Item->rangeOverlaps(Iter.Cur)) { 462 int32_t ItemReg = Item->getRegNum(); // Note: not getRegNumTmp() 463 Iter.Weights[ItemReg].setWeight(RegWeight::Inf); 464 Iter.Free[ItemReg] = false; 465 Iter.PrecoloredUnhandledMask[ItemReg] = true; 466 // Disable Iter.AllowOverlap if the preferred register is one of these 467 // pre-colored unhandled overlapping ranges. 468 if (Iter.AllowOverlap && ItemReg == Iter.PreferReg) { 469 Iter.AllowOverlap = false; 470 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled"); 471 } 472 } 473 } 474} 475 476void LinearScan::allocatePrecoloredRegister(Variable *Cur) { 477 int32_t RegNum = Cur->getRegNum(); 478 // RegNumTmp should have already been set above. 479 assert(Cur->getRegNumTmp() == RegNum); 480 dumpLiveRangeTrace("Precoloring ", Cur); 481 Active.push_back(Cur); 482 assert(RegUses[RegNum] >= 0); 483 ++RegUses[RegNum]; 484 assert(!UnhandledPrecolored.empty()); 485 assert(UnhandledPrecolored.back() == Cur); 486 UnhandledPrecolored.pop_back(); 487} 488 489void LinearScan::allocatePreferredRegister(IterationState &Iter) { 490 Iter.Cur->setRegNumTmp(Iter.PreferReg); 491 dumpLiveRangeTrace("Preferring ", Iter.Cur); 492 assert(RegUses[Iter.PreferReg] >= 0); 493 ++RegUses[Iter.PreferReg]; 494 Active.push_back(Iter.Cur); 495} 496 497void LinearScan::allocateFreeRegister(IterationState &Iter) { 498 int32_t RegNum = Iter.Free.find_first(); 499 Iter.Cur->setRegNumTmp(RegNum); 500 dumpLiveRangeTrace("Allocating ", Iter.Cur); 501 assert(RegUses[RegNum] >= 0); 502 ++RegUses[RegNum]; 503 Active.push_back(Iter.Cur); 504} 505 506void LinearScan::handleNoFreeRegisters(IterationState &Iter) { 507 // Check Active ranges. 508 for (const Variable *Item : Active) { 509 assert(Item->rangeOverlaps(Iter.Cur)); 510 int32_t RegNum = Item->getRegNumTmp(); 511 assert(Item->hasRegTmp()); 512 Iter.Weights[RegNum].addWeight(Item->getWeight(Func)); 513 } 514 // Same as above, but check Inactive ranges instead of Active. 515 for (const Variable *Item : Inactive) { 516 int32_t RegNum = Item->getRegNumTmp(); 517 assert(Item->hasRegTmp()); 518 if (Item->rangeOverlaps(Iter.Cur)) 519 Iter.Weights[RegNum].addWeight(Item->getWeight(Func)); 520 } 521 522 // All the weights are now calculated. Find the register with smallest 523 // weight. 524 int32_t MinWeightIndex = Iter.RegMask.find_first(); 525 // MinWeightIndex must be valid because of the initial RegMask.any() test. 526 assert(MinWeightIndex >= 0); 527 for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) { 528 if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex]) 529 MinWeightIndex = i; 530 } 531 532 if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { 533 // Cur doesn't have priority over any other live ranges, so don't allocate 534 // any register to it, and move it to the Handled state. 535 Handled.push_back(Iter.Cur); 536 if (Iter.Cur->mustHaveReg()) { 537 if (Kind == RAK_Phi) 538 addSpillFill(Iter); 539 else 540 Func->setError("Unable to find a physical register for an " 541 "infinite-weight live range"); 542 } 543 } else { 544 // Evict all live ranges in Active that register number MinWeightIndex is 545 // assigned to. 546 for (SizeT I = Active.size(); I > 0; --I) { 547 const SizeT Index = I - 1; 548 Variable *Item = Active[Index]; 549 if (Item->getRegNumTmp() == MinWeightIndex) { 550 dumpLiveRangeTrace("Evicting ", Item); 551 --RegUses[MinWeightIndex]; 552 assert(RegUses[MinWeightIndex] >= 0); 553 Item->setRegNumTmp(Variable::NoRegister); 554 moveItem(Active, Index, Handled); 555 } 556 } 557 // Do the same for Inactive. 558 for (SizeT I = Inactive.size(); I > 0; --I) { 559 const SizeT Index = I - 1; 560 Variable *Item = Inactive[Index]; 561 // Note: The Item->rangeOverlaps(Cur) clause is not part of the 562 // description of AssignMemLoc() in the original paper. But there 563 // doesn't seem to be any need to evict an inactive live range that 564 // doesn't overlap with the live range currently being considered. It's 565 // especially bad if we would end up evicting an infinite-weight but 566 // currently-inactive live range. The most common situation for this 567 // would be a scratch register kill set for call instructions. 568 if (Item->getRegNumTmp() == MinWeightIndex && 569 Item->rangeOverlaps(Iter.Cur)) { 570 dumpLiveRangeTrace("Evicting ", Item); 571 Item->setRegNumTmp(Variable::NoRegister); 572 moveItem(Inactive, Index, Handled); 573 } 574 } 575 // Assign the register to Cur. 576 Iter.Cur->setRegNumTmp(MinWeightIndex); 577 assert(RegUses[MinWeightIndex] >= 0); 578 ++RegUses[MinWeightIndex]; 579 Active.push_back(Iter.Cur); 580 dumpLiveRangeTrace("Allocating ", Iter.Cur); 581 } 582} 583 584void LinearScan::assignFinalRegisters( 585 const llvm::SmallBitVector &RegMaskFull, 586 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { 587 const size_t NumRegisters = RegMaskFull.size(); 588 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); 589 if (Randomized) { 590 // Create a random number generator for regalloc randomization. Merge 591 // function's sequence and Kind value as the Salt. Because regAlloc() is 592 // called twice under O2, the second time with RAK_Phi, we check 593 // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt 594 // is different. 595 uint64_t Salt = 596 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); 597 Func->getTarget()->makeRandomRegisterPermutation( 598 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); 599 } 600 601 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) 602 // for each Variable. 603 for (Variable *Item : Handled) { 604 int32_t RegNum = Item->getRegNumTmp(); 605 int32_t AssignedRegNum = RegNum; 606 607 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) { 608 AssignedRegNum = Permutation[RegNum]; 609 } 610 if (Verbose) { 611 Ostream &Str = Ctx->getStrDump(); 612 if (!Item->hasRegTmp()) { 613 Str << "Not assigning "; 614 Item->dump(Func); 615 Str << "\n"; 616 } else { 617 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning " 618 : "Assigning ") 619 << Func->getTarget()->getRegName(AssignedRegNum, IceType_i32) 620 << "(r" << AssignedRegNum << ") to "; 621 Item->dump(Func); 622 Str << "\n"; 623 } 624 } 625 Item->setRegNum(AssignedRegNum); 626 } 627} 628 629// Implements the linear-scan algorithm. Based on "Linear Scan Register 630// Allocation in the Context of SSA Form and Register Constraints" by Hanspeter 631// Mössenböck and Michael Pfeiffer, 632// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is 633// modified to take affinity into account and allow two interfering variables 634// to share the same register in certain cases. 635// 636// Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results 637// are assigned to Variable::RegNum for each Variable. 638void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, 639 bool Randomized) { 640 TimerMarker T(TimerStack::TT_linearScan, Func); 641 assert(RegMaskFull.any()); // Sanity check 642 if (Verbose) 643 Ctx->lockStr(); 644 Func->resetCurrentNode(); 645 const size_t NumRegisters = RegMaskFull.size(); 646 llvm::SmallBitVector PreDefinedRegisters(NumRegisters); 647 if (Randomized) { 648 for (Variable *Var : UnhandledPrecolored) { 649 PreDefinedRegisters[Var->getRegNum()] = true; 650 } 651 } 652 653 // Build a LiveRange representing the Kills list. 654 LiveRange KillsRange(Kills); 655 KillsRange.untrim(); 656 657 // Reset the register use count 658 RegUses.resize(NumRegisters); 659 std::fill(RegUses.begin(), RegUses.end(), 0); 660 661 // Unhandled is already set to all ranges in increasing order of start 662 // points. 663 assert(Active.empty()); 664 assert(Inactive.empty()); 665 assert(Handled.empty()); 666 const TargetLowering::RegSetMask RegsInclude = 667 TargetLowering::RegSet_CallerSave; 668 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None; 669 const llvm::SmallBitVector KillsMask = 670 Func->getTarget()->getRegisterSet(RegsInclude, RegsExclude); 671 672 // Allocate memory once outside the loop 673 IterationState Iter; 674 Iter.Weights.reserve(NumRegisters); 675 Iter.PrecoloredUnhandledMask.reserve(NumRegisters); 676 677 while (!Unhandled.empty()) { 678 Iter.Cur = Unhandled.back(); 679 Unhandled.pop_back(); 680 dumpLiveRangeTrace("\nConsidering ", Iter.Cur); 681 Iter.RegMask = 682 RegMaskFull & 683 Func->getTarget()->getRegisterSetForType(Iter.Cur->getType()); 684 KillsRange.trim(Iter.Cur->getLiveRange().getStart()); 685 686 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets 687 // that register. Previously processed live ranges would have avoided that 688 // register due to it being pre-colored. Future processed live ranges won't 689 // evict that register because the live range has infinite weight. 690 if (Iter.Cur->hasReg()) { 691 allocatePrecoloredRegister(Iter.Cur); 692 continue; 693 } 694 695 handleActiveRangeExpiredOrInactive(Iter.Cur); 696 handleInactiveRangeExpiredOrReactivated(Iter.Cur); 697 698 // Calculate available registers into Free[]. 699 Iter.Free = Iter.RegMask; 700 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { 701 if (RegUses[i] > 0) 702 Iter.Free[i] = false; 703 } 704 705 findRegisterPreference(Iter); 706 filterFreeWithInactiveRanges(Iter); 707 708 // Disable AllowOverlap if an Active variable, which is not Prefer, shares 709 // Prefer's register, and has a definition within Cur's live range. 710 if (Iter.AllowOverlap) { 711 for (const Variable *Item : Active) { 712 int32_t RegNum = Item->getRegNumTmp(); 713 if (Item != Iter.Prefer && RegNum == Iter.PreferReg && 714 overlapsDefs(Func, Iter.Cur, Item)) { 715 Iter.AllowOverlap = false; 716 dumpDisableOverlap(Func, Item, "Active"); 717 } 718 } 719 } 720 721 Iter.Weights.resize(Iter.RegMask.size()); 722 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight()); 723 724 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size()); 725 Iter.PrecoloredUnhandledMask.reset(); 726 727 filterFreeWithPrecoloredRanges(Iter); 728 729 // Remove scratch registers from the Free[] list, and mark their Weights[] 730 // as infinite, if KillsRange overlaps Cur's live range. 731 constexpr bool UseTrimmed = true; 732 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { 733 Iter.Free.reset(KillsMask); 734 for (int i = KillsMask.find_first(); i != -1; 735 i = KillsMask.find_next(i)) { 736 Iter.Weights[i].setWeight(RegWeight::Inf); 737 if (Iter.PreferReg == i) 738 Iter.AllowOverlap = false; 739 } 740 } 741 742 // Print info about physical register availability. 743 if (Verbose) { 744 Ostream &Str = Ctx->getStrDump(); 745 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { 746 if (Iter.RegMask[i]) { 747 Str << Func->getTarget()->getRegName(i, IceType_i32) 748 << "(U=" << RegUses[i] << ",F=" << Iter.Free[i] 749 << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") "; 750 } 751 } 752 Str << "\n"; 753 } 754 755 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) { 756 // First choice: a preferred register that is either free or is allowed 757 // to overlap with its linked variable. 758 allocatePreferredRegister(Iter); 759 } else if (Iter.Free.any()) { 760 // Second choice: any free register. 761 allocateFreeRegister(Iter); 762 } else { 763 // Fallback: there are no free registers, so we look for the 764 // lowest-weight register and see if Cur has higher weight. 765 handleNoFreeRegisters(Iter); 766 } 767 dump(Func); 768 } 769 770 // Move anything Active or Inactive to Handled for easier handling. 771 Handled.insert(Handled.end(), Active.begin(), Active.end()); 772 Active.clear(); 773 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end()); 774 Inactive.clear(); 775 dump(Func); 776 777 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized); 778 779 // TODO: Consider running register allocation one more time, with infinite 780 // registers, for two reasons. First, evicted live ranges get a second chance 781 // for a register. Second, it allows coalescing of stack slots. If there is 782 // no time budget for the second register allocation run, each unallocated 783 // variable just gets its own slot. 784 // 785 // Another idea for coalescing stack slots is to initialize the Unhandled 786 // list with just the unallocated variables, saving time but not offering 787 // second-chance opportunities. 788 789 if (Verbose) 790 Ctx->unlockStr(); 791} 792 793// ======================== Dump routines ======================== // 794 795void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) { 796 if (!BuildDefs::dump()) 797 return; 798 799 if (Verbose) { 800 Ostream &Str = Ctx->getStrDump(); 801 Str << Label; 802 dumpLiveRange(Item, Func); 803 Str << "\n"; 804 } 805} 806 807void LinearScan::dump(Cfg *Func) const { 808 if (!BuildDefs::dump()) 809 return; 810 if (!Func->isVerbose(IceV_LinearScan)) 811 return; 812 Ostream &Str = Func->getContext()->getStrDump(); 813 Func->resetCurrentNode(); 814 Str << "**** Current regalloc state:\n"; 815 Str << "++++++ Handled:\n"; 816 for (const Variable *Item : Handled) { 817 dumpLiveRange(Item, Func); 818 Str << "\n"; 819 } 820 Str << "++++++ Unhandled:\n"; 821 for (const Variable *Item : reverse_range(Unhandled)) { 822 dumpLiveRange(Item, Func); 823 Str << "\n"; 824 } 825 Str << "++++++ Active:\n"; 826 for (const Variable *Item : Active) { 827 dumpLiveRange(Item, Func); 828 Str << "\n"; 829 } 830 Str << "++++++ Inactive:\n"; 831 for (const Variable *Item : Inactive) { 832 dumpLiveRange(Item, Func); 833 Str << "\n"; 834 } 835} 836 837} // end of namespace Ice 838