IceRegAlloc.cpp revision 48e3ae5c62d7e626ed1a0dbbe38a7cc11a356260
1504214c4870e9183418014634268ce630eb5332algao//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===// 2504214c4870e9183418014634268ce630eb5332algao// 3504214c4870e9183418014634268ce630eb5332algao// The Subzero Code Generator 4504214c4870e9183418014634268ce630eb5332algao// 58d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// This file is distributed under the University of Illinois Open Source 601f352a7ca57e2bc5ed0f72e5b1d50b6de4469ederic_tian// License. See LICENSE.TXT for details. 78d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// 88d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang//===----------------------------------------------------------------------===// 98d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang/// 108d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang/// \file 118d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang/// This file implements the LinearScan class, which performs the linear-scan 128d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang/// register allocation after liveness analysis has been performed. 138d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang/// 148d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang//===----------------------------------------------------------------------===// 158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 16504214c4870e9183418014634268ce630eb5332algao#include "IceRegAlloc.h" 178d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 188d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceCfg.h" 198d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceCfgNode.h" 208d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceInst.h" 218d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceInstVarIter.h" 228d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceOperand.h" 238d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang#include "IceTargetLowering.h" 24fe1e36e550c6ffcd2561903d434683d3939e1942jji 258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangnamespace Ice { 268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangnamespace { 288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 29fe1e36e550c6ffcd2561903d434683d3939e1942jji// Returns true if Var has any definitions within Item's live range. 308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// TODO(stichnot): Consider trimming the Definitions list similar to how the 318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// live ranges are trimmed, since all the overlapsDefs() tests are whether some 328d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// variable's definitions overlap Cur, and trimming is with respect Cur.start. 338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Initial tests show no measurable performance difference, so we'll keep the 348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// code simple for now. 358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangbool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) { 368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang constexpr bool UseTrimmed = true; 378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang VariablesMetadata *VMetadata = Func->getVMetadata(); 388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed)) 408d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return true; 418d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) { 428d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed)) 438d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return true; 448d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return false; 468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 478d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 488d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid dumpDisableOverlap(const Cfg *Func, const Variable *Var, 498d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const char *Reason) { 508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!BuildDefs::dump()) 518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return; 528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Func->isVerbose(IceV_LinearScan)) { 538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang VariablesMetadata *VMetadata = Func->getVMetadata(); 548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Ostream &Str = Func->getContext()->getStrDump(); 558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << "Disabling Overlap due to " << Reason << " " << *Var 568d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang << " LIVE=" << Var->getLiveRange() << " Defs="; 578d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) 588d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << FirstDef->getNumber(); 598d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const InstDefList &Defs = VMetadata->getLatterDefinitions(Var); 608d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (size_t i = 0; i < Defs.size(); ++i) { 618d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << "," << Defs[i]->getNumber(); 628d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 638d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << "\n"; 640c2b5da80e9551286cd02a92d91090290ae2d816qwang } 658d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 668d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 678d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid dumpLiveRange(const Variable *Var, const Cfg *Func) { 689cad030bc14e706d8986ed33f0fa8b28f0828c48yshang if (!BuildDefs::dump()) 699cad030bc14e706d8986ed33f0fa8b28f0828c48yshang return; 709cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Ostream &Str = Func->getContext()->getStrDump(); 719cad030bc14e706d8986ed33f0fa8b28f0828c48yshang char buf[30]; 729cad030bc14e706d8986ed33f0fa8b28f0828c48yshang snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp()); 739cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Str << "R=" << buf << " V="; 749cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Var->dump(Func); 759cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Str << " Range=" << Var->getLiveRange(); 769cad030bc14e706d8986ed33f0fa8b28f0828c48yshang} 779cad030bc14e706d8986ed33f0fa8b28f0828c48yshang 789cad030bc14e706d8986ed33f0fa8b28f0828c48yshang} // end of anonymous namespace 799cad030bc14e706d8986ed33f0fa8b28f0828c48yshang 809cad030bc14e706d8986ed33f0fa8b28f0828c48yshangLinearScan::LinearScan(Cfg *Func) 819cad030bc14e706d8986ed33f0fa8b28f0828c48yshang : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), 829cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} 839cad030bc14e706d8986ed33f0fa8b28f0828c48yshang 849cad030bc14e706d8986ed33f0fa8b28f0828c48yshang// Prepare for full register allocation of all variables. We depend on liveness 859cad030bc14e706d8986ed33f0fa8b28f0828c48yshang// analysis to have calculated live ranges. 869cad030bc14e706d8986ed33f0fa8b28f0828c48yshangvoid LinearScan::initForGlobal() { 879cad030bc14e706d8986ed33f0fa8b28f0828c48yshang TimerMarker T(TimerStack::TT_initUnhandled, Func); 889cad030bc14e706d8986ed33f0fa8b28f0828c48yshang FindPreference = true; 899cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // For full register allocation, normally we want to enable FindOverlap 909cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // (meaning we look for opportunities for two overlapping live ranges to 919cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // safely share the same register). However, we disable it for phi-lowering 929cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // register allocation since no overlap opportunities should be available and 939cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // it's more expensive to look for opportunities. 949cad030bc14e706d8986ed33f0fa8b28f0828c48yshang FindOverlap = (Kind != RAK_Phi); 959cad030bc14e706d8986ed33f0fa8b28f0828c48yshang const VarList &Vars = Func->getVariables(); 969cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Unhandled.reserve(Vars.size()); 979cad030bc14e706d8986ed33f0fa8b28f0828c48yshang UnhandledPrecolored.reserve(Vars.size()); 989cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // Gather the live ranges of all variables and add them to the Unhandled set. 999cad030bc14e706d8986ed33f0fa8b28f0828c48yshang for (Variable *Var : Vars) { 1009cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // Explicitly don't consider zero-weight variables, which are meant to be 1019cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // spill slots. 1029cad030bc14e706d8986ed33f0fa8b28f0828c48yshang if (Var->mustNotHaveReg()) 1039cad030bc14e706d8986ed33f0fa8b28f0828c48yshang continue; 1049cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // Don't bother if the variable has a null live range, which means it was 1059cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // never referenced. 1069cad030bc14e706d8986ed33f0fa8b28f0828c48yshang if (Var->getLiveRange().isEmpty()) 1079cad030bc14e706d8986ed33f0fa8b28f0828c48yshang continue; 1089cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Var->untrimLiveRange(); 1099cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Unhandled.push_back(Var); 1109cad030bc14e706d8986ed33f0fa8b28f0828c48yshang if (Var->hasReg()) { 1119cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Var->setRegNumTmp(Var->getRegNum()); 1129cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Var->setMustHaveReg(); 1139cad030bc14e706d8986ed33f0fa8b28f0828c48yshang UnhandledPrecolored.push_back(Var); 1149cad030bc14e706d8986ed33f0fa8b28f0828c48yshang } 1159cad030bc14e706d8986ed33f0fa8b28f0828c48yshang } 1169cad030bc14e706d8986ed33f0fa8b28f0828c48yshang 1179cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // Build the (ordered) list of FakeKill instruction numbers. 1189cad030bc14e706d8986ed33f0fa8b28f0828c48yshang Kills.clear(); 1199cad030bc14e706d8986ed33f0fa8b28f0828c48yshang // Phi lowering should not be creating new call instructions, so there should 120130e25699a6d060c966978e345c6c8eeed85d065yshang // be no infinite-weight not-yet-colored live ranges that span a call 121130e25699a6d060c966978e345c6c8eeed85d065yshang // instruction, hence no need to construct the Kills list. 122130e25699a6d060c966978e345c6c8eeed85d065yshang if (Kind == RAK_Phi) 123130e25699a6d060c966978e345c6c8eeed85d065yshang return; 1248d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (CfgNode *Node : Func->getNodes()) { 1258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (Inst &I : Node->getInsts()) { 1268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (auto Kill = llvm::dyn_cast<InstFakeKill>(&I)) { 1278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted()) 1288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Kills.push_back(I.getNumber()); 1298d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 1308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 1318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 132130e25699a6d060c966978e345c6c8eeed85d065yshang} 1338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 1348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Validate the integrity of the live ranges. If there are any errors, it 1358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// prints details and returns false. On success, it returns true. 1368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangbool LinearScan::livenessValidateIntervals( 1378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const DefUseErrorList &DefsWithoutUses, 1388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const DefUseErrorList &UsesBeforeDefs, 1398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const CfgVector<InstNumberT> &LRBegin, 140130e25699a6d060c966978e345c6c8eeed85d065yshang const CfgVector<InstNumberT> &LREnd) { 141130e25699a6d060c966978e345c6c8eeed85d065yshang if (DefsWithoutUses.empty() && UsesBeforeDefs.empty()) 142130e25699a6d060c966978e345c6c8eeed85d065yshang return true; 143130e25699a6d060c966978e345c6c8eeed85d065yshang 144130e25699a6d060c966978e345c6c8eeed85d065yshang if (!BuildDefs::dump()) 1458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return false; 1468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 147130e25699a6d060c966978e345c6c8eeed85d065yshang const VarList &Vars = Func->getVariables(); 148130e25699a6d060c966978e345c6c8eeed85d065yshang OstreamLocker L(Ctx); 149130e25699a6d060c966978e345c6c8eeed85d065yshang Ostream &Str = Ctx->getStrDump(); 1508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT VarNum : DefsWithoutUses) { 1518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Var = Vars[VarNum]; 1528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << "LR def without use, instruction " << LRBegin[VarNum] 1538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang << ", variable " << Var->getName(Func) << "\n"; 1548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 1558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT VarNum : UsesBeforeDefs) { 156130e25699a6d060c966978e345c6c8eeed85d065yshang Variable *Var = Vars[VarNum]; 157130e25699a6d060c966978e345c6c8eeed85d065yshang Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable " 158130e25699a6d060c966978e345c6c8eeed85d065yshang << Var->getName(Func) << "\n"; 159130e25699a6d060c966978e345c6c8eeed85d065yshang } 160130e25699a6d060c966978e345c6c8eeed85d065yshang return false; 161130e25699a6d060c966978e345c6c8eeed85d065yshang} 162130e25699a6d060c966978e345c6c8eeed85d065yshang 163130e25699a6d060c966978e345c6c8eeed85d065yshang// Prepare for very simple register allocation of only infinite-weight 164130e25699a6d060c966978e345c6c8eeed85d065yshang// Variables while respecting pre-colored Variables. Some properties we take 165130e25699a6d060c966978e345c6c8eeed85d065yshang// advantage of: 166130e25699a6d060c966978e345c6c8eeed85d065yshang// 167130e25699a6d060c966978e345c6c8eeed85d065yshang// * Live ranges of interest consist of a single segment. 168130e25699a6d060c966978e345c6c8eeed85d065yshang// 169130e25699a6d060c966978e345c6c8eeed85d065yshang// * Live ranges of interest never span a call instruction. 170130e25699a6d060c966978e345c6c8eeed85d065yshang// 171130e25699a6d060c966978e345c6c8eeed85d065yshang// * Phi instructions are not considered because either phis have already been 172130e25699a6d060c966978e345c6c8eeed85d065yshang// lowered, or they don't contain any pre-colored or infinite-weight 173130e25699a6d060c966978e345c6c8eeed85d065yshang// Variables. 174130e25699a6d060c966978e345c6c8eeed85d065yshang// 175130e25699a6d060c966978e345c6c8eeed85d065yshang// * We don't need to renumber instructions before computing live ranges 176130e25699a6d060c966978e345c6c8eeed85d065yshang// because all the high-level ICE instructions are deleted prior to lowering, 177130e25699a6d060c966978e345c6c8eeed85d065yshang// and the low-level instructions are added in monotonically increasing order. 178130e25699a6d060c966978e345c6c8eeed85d065yshang// 179130e25699a6d060c966978e345c6c8eeed85d065yshang// * There are no opportunities for register preference or allowing overlap. 180130e25699a6d060c966978e345c6c8eeed85d065yshang// 181130e25699a6d060c966978e345c6c8eeed85d065yshang// Some properties we aren't (yet) taking advantage of: 182130e25699a6d060c966978e345c6c8eeed85d065yshang// 183130e25699a6d060c966978e345c6c8eeed85d065yshang// * Because live ranges are a single segment, the Inactive set will always be 184130e25699a6d060c966978e345c6c8eeed85d065yshang// empty, and the live range trimming operation is unnecessary. 185130e25699a6d060c966978e345c6c8eeed85d065yshang// 186130e25699a6d060c966978e345c6c8eeed85d065yshang// * Calculating overlap of single-segment live ranges could be optimized a 187130e25699a6d060c966978e345c6c8eeed85d065yshang// bit. 188130e25699a6d060c966978e345c6c8eeed85d065yshangvoid LinearScan::initForInfOnly() { 189130e25699a6d060c966978e345c6c8eeed85d065yshang TimerMarker T(TimerStack::TT_initUnhandled, Func); 190130e25699a6d060c966978e345c6c8eeed85d065yshang FindPreference = false; 191130e25699a6d060c966978e345c6c8eeed85d065yshang FindOverlap = false; 192130e25699a6d060c966978e345c6c8eeed85d065yshang SizeT NumVars = 0; 193130e25699a6d060c966978e345c6c8eeed85d065yshang const VarList &Vars = Func->getVariables(); 194130e25699a6d060c966978e345c6c8eeed85d065yshang 195130e25699a6d060c966978e345c6c8eeed85d065yshang // Iterate across all instructions and record the begin and end of the live 196130e25699a6d060c966978e345c6c8eeed85d065yshang // range for each variable that is pre-colored or infinite weight. 197130e25699a6d060c966978e345c6c8eeed85d065yshang CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel); 198130e25699a6d060c966978e345c6c8eeed85d065yshang CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel); 199130e25699a6d060c966978e345c6c8eeed85d065yshang DefUseErrorList DefsWithoutUses, UsesBeforeDefs; 200130e25699a6d060c966978e345c6c8eeed85d065yshang for (CfgNode *Node : Func->getNodes()) { 201130e25699a6d060c966978e345c6c8eeed85d065yshang for (Inst &Inst : Node->getInsts()) { 202130e25699a6d060c966978e345c6c8eeed85d065yshang if (Inst.isDeleted()) 203130e25699a6d060c966978e345c6c8eeed85d065yshang continue; 204130e25699a6d060c966978e345c6c8eeed85d065yshang FOREACH_VAR_IN_INST(Var, Inst) { 205130e25699a6d060c966978e345c6c8eeed85d065yshang if (Var->getIgnoreLiveness()) 206130e25699a6d060c966978e345c6c8eeed85d065yshang continue; 207130e25699a6d060c966978e345c6c8eeed85d065yshang if (Var->hasReg() || Var->mustHaveReg()) { 208130e25699a6d060c966978e345c6c8eeed85d065yshang SizeT VarNum = Var->getIndex(); 209130e25699a6d060c966978e345c6c8eeed85d065yshang LREnd[VarNum] = Inst.getNumber(); 210130e25699a6d060c966978e345c6c8eeed85d065yshang if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel) 211130e25699a6d060c966978e345c6c8eeed85d065yshang UsesBeforeDefs.push_back(VarNum); 212130e25699a6d060c966978e345c6c8eeed85d065yshang } 213130e25699a6d060c966978e345c6c8eeed85d065yshang } 214130e25699a6d060c966978e345c6c8eeed85d065yshang if (const Variable *Var = Inst.getDest()) { 2158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!Var->getIgnoreLiveness() && 2168d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang (Var->hasReg() || Var->mustHaveReg())) { 217130e25699a6d060c966978e345c6c8eeed85d065yshang if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) { 218130e25699a6d060c966978e345c6c8eeed85d065yshang LRBegin[Var->getIndex()] = Inst.getNumber(); 219130e25699a6d060c966978e345c6c8eeed85d065yshang ++NumVars; 220130e25699a6d060c966978e345c6c8eeed85d065yshang } 221130e25699a6d060c966978e345c6c8eeed85d065yshang } 222130e25699a6d060c966978e345c6c8eeed85d065yshang } 223130e25699a6d060c966978e345c6c8eeed85d065yshang } 224130e25699a6d060c966978e345c6c8eeed85d065yshang } 225130e25699a6d060c966978e345c6c8eeed85d065yshang 226130e25699a6d060c966978e345c6c8eeed85d065yshang Unhandled.reserve(NumVars); 227130e25699a6d060c966978e345c6c8eeed85d065yshang UnhandledPrecolored.reserve(NumVars); 228130e25699a6d060c966978e345c6c8eeed85d065yshang for (SizeT i = 0; i < Vars.size(); ++i) { 229130e25699a6d060c966978e345c6c8eeed85d065yshang Variable *Var = Vars[i]; 230130e25699a6d060c966978e345c6c8eeed85d065yshang if (LRBegin[i] != Inst::NumberSentinel) { 231130e25699a6d060c966978e345c6c8eeed85d065yshang if (LREnd[i] == Inst::NumberSentinel) { 232130e25699a6d060c966978e345c6c8eeed85d065yshang DefsWithoutUses.push_back(i); 233130e25699a6d060c966978e345c6c8eeed85d065yshang continue; 234130e25699a6d060c966978e345c6c8eeed85d065yshang } 235130e25699a6d060c966978e345c6c8eeed85d065yshang Unhandled.push_back(Var); 236130e25699a6d060c966978e345c6c8eeed85d065yshang Var->resetLiveRange(); 237130e25699a6d060c966978e345c6c8eeed85d065yshang Var->addLiveRange(LRBegin[i], LREnd[i]); 238130e25699a6d060c966978e345c6c8eeed85d065yshang Var->untrimLiveRange(); 239130e25699a6d060c966978e345c6c8eeed85d065yshang if (Var->hasReg()) { 240130e25699a6d060c966978e345c6c8eeed85d065yshang Var->setRegNumTmp(Var->getRegNum()); 241130e25699a6d060c966978e345c6c8eeed85d065yshang Var->setMustHaveReg(); 242130e25699a6d060c966978e345c6c8eeed85d065yshang UnhandledPrecolored.push_back(Var); 243130e25699a6d060c966978e345c6c8eeed85d065yshang } 244130e25699a6d060c966978e345c6c8eeed85d065yshang --NumVars; 245130e25699a6d060c966978e345c6c8eeed85d065yshang } 246130e25699a6d060c966978e345c6c8eeed85d065yshang } 247130e25699a6d060c966978e345c6c8eeed85d065yshang 248130e25699a6d060c966978e345c6c8eeed85d065yshang if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin, 249130e25699a6d060c966978e345c6c8eeed85d065yshang LREnd)) { 250130e25699a6d060c966978e345c6c8eeed85d065yshang llvm::report_fatal_error("initForInfOnly: Liveness error"); 251130e25699a6d060c966978e345c6c8eeed85d065yshang return; 252130e25699a6d060c966978e345c6c8eeed85d065yshang } 253130e25699a6d060c966978e345c6c8eeed85d065yshang 254130e25699a6d060c966978e345c6c8eeed85d065yshang if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) { 255130e25699a6d060c966978e345c6c8eeed85d065yshang if (BuildDefs::dump()) { 256130e25699a6d060c966978e345c6c8eeed85d065yshang OstreamLocker L(Ctx); 257130e25699a6d060c966978e345c6c8eeed85d065yshang Ostream &Str = Ctx->getStrDump(); 258130e25699a6d060c966978e345c6c8eeed85d065yshang for (SizeT VarNum : DefsWithoutUses) { 259130e25699a6d060c966978e345c6c8eeed85d065yshang Variable *Var = Vars[VarNum]; 260130e25699a6d060c966978e345c6c8eeed85d065yshang Str << "LR def without use, instruction " << LRBegin[VarNum] 261130e25699a6d060c966978e345c6c8eeed85d065yshang << ", variable " << Var->getName(Func) << "\n"; 262130e25699a6d060c966978e345c6c8eeed85d065yshang } 263130e25699a6d060c966978e345c6c8eeed85d065yshang for (SizeT VarNum : UsesBeforeDefs) { 264130e25699a6d060c966978e345c6c8eeed85d065yshang Variable *Var = Vars[VarNum]; 265130e25699a6d060c966978e345c6c8eeed85d065yshang Str << "LR use before def, instruction " << LREnd[VarNum] 266130e25699a6d060c966978e345c6c8eeed85d065yshang << ", variable " << Var->getName(Func) << "\n"; 267130e25699a6d060c966978e345c6c8eeed85d065yshang } 268130e25699a6d060c966978e345c6c8eeed85d065yshang } 269130e25699a6d060c966978e345c6c8eeed85d065yshang llvm::report_fatal_error("initForInfOnly: Liveness error"); 270130e25699a6d060c966978e345c6c8eeed85d065yshang } 271130e25699a6d060c966978e345c6c8eeed85d065yshang // This isn't actually a fatal condition, but it would be nice to know if we 272130e25699a6d060c966978e345c6c8eeed85d065yshang // somehow pre-calculated Unhandled's size wrong. 273130e25699a6d060c966978e345c6c8eeed85d065yshang assert(NumVars == 0); 274130e25699a6d060c966978e345c6c8eeed85d065yshang 2758d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Don't build up the list of Kills because we know that no infinite-weight 2768d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Variable has a live range spanning a call. 2778d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Kills.clear(); 278130e25699a6d060c966978e345c6c8eeed85d065yshang} 279130e25699a6d060c966978e345c6c8eeed85d065yshang 2808d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::init(RegAllocKind Kind) { 2818d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang this->Kind = Kind; 2828d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Unhandled.clear(); 283130e25699a6d060c966978e345c6c8eeed85d065yshang UnhandledPrecolored.clear(); 284130e25699a6d060c966978e345c6c8eeed85d065yshang Handled.clear(); 285130e25699a6d060c966978e345c6c8eeed85d065yshang Inactive.clear(); 286130e25699a6d060c966978e345c6c8eeed85d065yshang Active.clear(); 2878d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 2888d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang SizeT NumRegs = Target->getNumRegisters(); 289130e25699a6d060c966978e345c6c8eeed85d065yshang RegAliases.resize(NumRegs); 290130e25699a6d060c966978e345c6c8eeed85d065yshang for (SizeT Reg = 0; Reg < NumRegs; ++Reg) { 291130e25699a6d060c966978e345c6c8eeed85d065yshang RegAliases[Reg] = &Target->getAliasesForRegister(Reg); 292130e25699a6d060c966978e345c6c8eeed85d065yshang } 293130e25699a6d060c966978e345c6c8eeed85d065yshang 294130e25699a6d060c966978e345c6c8eeed85d065yshang switch (Kind) { 295130e25699a6d060c966978e345c6c8eeed85d065yshang case RAK_Unknown: 296130e25699a6d060c966978e345c6c8eeed85d065yshang llvm::report_fatal_error("Invalid RAK_Unknown"); 2978d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang break; 2988d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang case RAK_Global: 299130e25699a6d060c966978e345c6c8eeed85d065yshang case RAK_Phi: 3008d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang initForGlobal(); 3018d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang break; 3028d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang case RAK_InfOnly: 3038d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang initForInfOnly(); 3048d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang break; 3058d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 3068d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 3078d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang auto CompareRanges = [](const Variable *L, const Variable *R) { 3088d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstNumberT Lstart = L->getLiveRange().getStart(); 3098d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstNumberT Rstart = R->getLiveRange().getStart(); 3108d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Lstart == Rstart) 3118d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return L->getIndex() < R->getIndex(); 3128d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang return Lstart < Rstart; 3138d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang }; 3148d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Do a reverse sort so that erasing elements (from the end) is fast. 3158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges); 3168d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(), 3178d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang CompareRanges); 3188d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 3198d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Handled.reserve(Unhandled.size()); 3208d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Inactive.reserve(Unhandled.size()); 3218d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Active.reserve(Unhandled.size()); 3228d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 3238d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 3248d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// This is called when Cur must be allocated a register but no registers are 3258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// available across Cur's live range. To handle this, we find a register that 3268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// is not explicitly used during Cur's live range, spill that register to a 3278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// stack location right before Cur's live range begins, and fill (reload) the 3288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// register from the stack location right after Cur's live range ends. 3298d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::addSpillFill(IterationState &Iter) { 3308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Identify the actual instructions that begin and end Iter.Cur's live range. 3318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Iterate through Iter.Cur's node's instruction list until we find the actual 3328d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // instructions with instruction numbers corresponding to Iter.Cur's recorded 3338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // live range endpoints. This sounds inefficient but shouldn't be a problem 3348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // in practice because: 3358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // (1) This function is almost never called in practice. 3368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // (2) Since this register over-subscription problem happens only for 3378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // phi-lowered instructions, the number of instructions in the node is 3388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // proportional to the number of phi instructions in the original node, 3398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // which is never very large in practice. 3408d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // (3) We still have to iterate through all instructions of Iter.Cur's live 3418d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // range to find all explicitly used registers (though the live range is 3428d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // usually only 2-3 instructions), so the main cost that could be avoided 3438d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // would be finding the instruction that begin's Iter.Cur's live range. 3448d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(!Iter.Cur->getLiveRange().isEmpty()); 3458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstNumberT Start = Iter.Cur->getLiveRange().getStart(); 3468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstNumberT End = Iter.Cur->getLiveRange().getEnd(); 3478d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur); 3488d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Node); 3498d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstList &Insts = Node->getInsts(); 3508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstList::iterator SpillPoint = Insts.end(); 3518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang InstList::iterator FillPoint = Insts.end(); 3528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Stop searching after we have found both the SpillPoint and the FillPoint. 3538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (auto I = Insts.begin(), E = Insts.end(); 3548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang I != E && (SpillPoint == E || FillPoint == E); ++I) { 3558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (I->getNumber() == Start) 3568d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang SpillPoint = I; 3578d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (I->getNumber() == End) 3588d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang FillPoint = I; 3598d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (SpillPoint != E) { 3608d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Remove from RegMask any physical registers referenced during Cur's 3618d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // live range. Start looking after SpillPoint gets set, i.e. once Cur's 3628d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // live range begins. 3638d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang FOREACH_VAR_IN_INST(Var, *I) { 3648d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!Var->hasRegTmp()) 3658d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang continue; 3668d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()]; 3678d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 3688d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 3698d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.RegMask[RegAlias] = false; 3708d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 371130e25699a6d060c966978e345c6c8eeed85d065yshang } 372130e25699a6d060c966978e345c6c8eeed85d065yshang } 3738d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 3748d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(SpillPoint != Insts.end()); 3758d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(FillPoint != Insts.end()); 3768d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++FillPoint; 3778d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // TODO(stichnot): Randomize instead of find_first(). 3788d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang int32_t RegNum = Iter.RegMask.find_first(); 3798d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegNum != -1); 3808d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Cur->setRegNumTmp(RegNum); 3818d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); 3828d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that 3838d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame 3848d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // size. 3858d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); 3868d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Add "reg=FakeDef;spill=reg" before SpillPoint 387130e25699a6d060c966978e345c6c8eeed85d065yshang Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); 388130e25699a6d060c966978e345c6c8eeed85d065yshang Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg)); 389130e25699a6d060c966978e345c6c8eeed85d065yshang // add "reg=spill;FakeUse(reg)" before FillPoint 3908d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc)); 3918d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg)); 3928d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 3938d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 3948d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) { 3958d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT I = Active.size(); I > 0; --I) { 3968d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const SizeT Index = I - 1; 3978d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Item = Active[Index]; 3988d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Item->trimLiveRange(Cur->getLiveRange().getStart()); 3998d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang bool Moved = false; 4008d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Item->rangeEndsBefore(Cur)) { 4018d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Move Item from Active to Handled list. 402284c8400e8c5518b688c7bca66cc73c55532ac39qwang dumpLiveRangeTrace("Expiring ", Item); 4038d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Active, Index, Handled); 4048d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Moved = true; 4058d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } else if (!Item->rangeOverlapsStart(Cur)) { 4068d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Move Item from Active to Inactive list. 4078d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Inactivating ", Item); 4088d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Active, Index, Inactive); 4098d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Moved = true; 4108d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4118d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Moved) { 4128d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Decrement Item from RegUses[]. 4138d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Item->hasRegTmp()); 4148d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()]; 4158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 4168d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 4178d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang --RegUses[RegAlias]; 4188d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegUses[RegAlias] >= 0); 4198d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4208d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4218d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4228d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 4238d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 4248d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) { 4258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT I = Inactive.size(); I > 0; --I) { 4268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const SizeT Index = I - 1; 4278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Item = Inactive[Index]; 4288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Item->trimLiveRange(Cur->getLiveRange().getStart()); 4298d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Item->rangeEndsBefore(Cur)) { 4308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Move Item from Inactive to Handled list. 4318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Expiring ", Item); 4328d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Inactive, Index, Handled); 4338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } else if (Item->rangeOverlapsStart(Cur)) { 4348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Move Item from Inactive to Active list. 4358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Reactivating ", Item); 4368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Inactive, Index, Active); 4378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Increment Item in RegUses[]. 4388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Item->hasRegTmp()); 4398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()]; 4408d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 4418d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 4428d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegUses[RegAlias] >= 0); 4438d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++RegUses[RegAlias]; 4448d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4478d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 4488d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 4498d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Infer register preference and allowable overlap. Only form a preference when 4508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// the current Variable has an unambiguous "first" definition. The preference 4518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// is some source Variable of the defining instruction that either is assigned 4528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// a register that is currently free, or that is assigned a register that is 4538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// not free but overlap is allowed. Overlap is allowed when the Variable under 4548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// consideration is single-definition, and its definition is a simple 4558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// assignment - i.e., the register gets copied/aliased but is never modified. 4568d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Furthermore, overlap is only allowed when preferred Variable definition 4578d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// instructions do not appear within the current Variable's live range. 4588d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::findRegisterPreference(IterationState &Iter) { 4598d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Prefer = nullptr; 4608d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.PreferReg = Variable::NoRegister; 4618d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.AllowOverlap = false; 4628d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 4638d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (FindPreference) { 4648d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang VariablesMetadata *VMetadata = Func->getVMetadata(); 4658d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (const Inst *DefInst = 4668d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang VMetadata->getFirstDefinitionSingleBlock(Iter.Cur)) { 4678d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(DefInst->getDest() == Iter.Cur); 4688d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang bool IsAssign = DefInst->isSimpleAssign(); 4698d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang bool IsSingleDef = !VMetadata->isMultiDef(Iter.Cur); 4708d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT i = 0; i < DefInst->getSrcSize(); ++i) { 4718d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // TODO(stichnot): Iterate through the actual Variables of the 4728d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // instruction, not just the source operands. This could capture Load 4738d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // instructions, including address mode optimization, for Prefer (but 4748d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // not for AllowOverlap). 4758d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { 4768d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang int32_t SrcReg = SrcVar->getRegNumTmp(); 4778d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Only consider source variables that have (so far) been assigned a 4788d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // register. That register must be one in the RegMask set, e.g. don't 4798d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // try to prefer the stack pointer as a result of the stacksave 4808d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // intrinsic. 4818d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { 4828d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (FindOverlap && !Iter.Free[SrcReg]) { 4838d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Don't bother trying to enable AllowOverlap if the register is 4848d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // already free. 4858d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.AllowOverlap = IsSingleDef && IsAssign && 4868d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang !overlapsDefs(Func, Iter.Cur, SrcVar); 4878d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4888d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.AllowOverlap || Iter.Free[SrcReg]) { 4898d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Prefer = SrcVar; 4908d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.PreferReg = SrcReg; 4918d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4928d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 4939cad030bc14e706d8986ed33f0fa8b28f0828c48yshang } 4949cad030bc14e706d8986ed33f0fa8b28f0828c48yshang } 4958d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Verbose && Iter.Prefer) { 4968d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Ostream &Str = Ctx->getStrDump(); 4978d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << "Initial Iter.Prefer="; 4988d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Prefer->dump(Func); 4998d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Str << " R=" << Iter.PreferReg 5008d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang << " LIVE=" << Iter.Prefer->getLiveRange() 5018d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang << " Overlap=" << Iter.AllowOverlap << "\n"; 5028d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5038d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5048d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5058d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 5068d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 5078d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Remove registers from the Free[] list where an Inactive range overlaps with 5088d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// the current range. 5098d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) { 5108d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (const Variable *Item : Inactive) { 5118d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!Item->rangeOverlaps(Iter.Cur)) 5128d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang continue; 5138d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()]; 5148d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 5158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 5168d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Don't assert(Free[RegNum]) because in theory (though probably never in 5178d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // practice) there could be two inactive variables that were marked with 5188d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // AllowOverlap. 5198d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Free[RegAlias] = false; 5208d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Disable AllowOverlap if an Inactive variable, which is not Prefer, 5218d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // shares Prefer's register, and has a definition within Cur's live 5228d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // range. 5238d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.AllowOverlap && Item != Iter.Prefer && 5248d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) { 5258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.AllowOverlap = false; 5268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpDisableOverlap(Func, Item, "Inactive"); 5278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5298d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 5318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 5328d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// Remove registers from the Free[] list where an Unhandled pre-colored range 5338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// overlaps with the current range, and set those registers to infinite weight 5348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is 5358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// an early exit check that turns a guaranteed O(N^2) algorithm into expected 5368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang// linear complexity. 5378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { 5388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (Variable *Item : reverse_range(UnhandledPrecolored)) { 5398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Item->hasReg()); 5408d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.Cur->rangeEndsBefore(Item)) 5418d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang break; 5428d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Item->rangeOverlaps(Iter.Cur)) { 5438d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = 5448d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp() 5458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 5468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 5478d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Weights[RegAlias].setWeight(RegWeight::Inf); 5488d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Free[RegAlias] = false; 5498d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.PrecoloredUnhandledMask[RegAlias] = true; 5508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Disable Iter.AllowOverlap if the preferred register is one of these 5518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // pre-colored unhandled overlapping ranges. 5528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) { 5538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.AllowOverlap = false; 5548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpDisableOverlap(Func, Item, "PrecoloredUnhandled"); 5558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5568d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5578d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5588d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5598d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 5608d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 5618d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::allocatePrecoloredRegister(Variable *Cur) { 5628d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang int32_t RegNum = Cur->getRegNum(); 5638d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // RegNumTmp should have already been set above. 5648d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Cur->getRegNumTmp() == RegNum); 5658d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Precoloring ", Cur); 5668d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Active.push_back(Cur); 5678d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[RegNum]; 5688d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 5698d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 5708d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegUses[RegAlias] >= 0); 5718d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++RegUses[RegAlias]; 5728d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5738d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(!UnhandledPrecolored.empty()); 5748d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(UnhandledPrecolored.back() == Cur); 5758d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang UnhandledPrecolored.pop_back(); 5768d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 5778d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 5788d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::allocatePreferredRegister(IterationState &Iter) { 5798d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Cur->setRegNumTmp(Iter.PreferReg); 5808d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Preferring ", Iter.Cur); 581284c8400e8c5518b688c7bca66cc73c55532ac39qwang const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg]; 5828d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 5838d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 58401f352a7ca57e2bc5ed0f72e5b1d50b6de4469ederic_tian assert(RegUses[RegAlias] >= 0); 5858d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++RegUses[RegAlias]; 5868d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 5878d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Active.push_back(Iter.Cur); 5888d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 5898d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 5908d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::allocateFreeRegister(IterationState &Iter) { 5918d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang int32_t RegNum = Iter.Free.find_first(); 5928d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Cur->setRegNumTmp(RegNum); 5938d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Allocating ", Iter.Cur); 5948d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[RegNum]; 5958d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 5968d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 597130e25699a6d060c966978e345c6c8eeed85d065yshang assert(RegUses[RegAlias] >= 0); 5988d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++RegUses[RegAlias]; 59939aea48d30311de191a1e6c2c66b2307c29f385fgtian } 60039aea48d30311de191a1e6c2c66b2307c29f385fgtian Active.push_back(Iter.Cur); 60139aea48d30311de191a1e6c2c66b2307c29f385fgtian} 60239aea48d30311de191a1e6c2c66b2307c29f385fgtian 60339aea48d30311de191a1e6c2c66b2307c29f385fgtianvoid LinearScan::handleNoFreeRegisters(IterationState &Iter) { 60439aea48d30311de191a1e6c2c66b2307c29f385fgtian // Check Active ranges. 60539aea48d30311de191a1e6c2c66b2307c29f385fgtian for (const Variable *Item : Active) { 606130e25699a6d060c966978e345c6c8eeed85d065yshang assert(Item->rangeOverlaps(Iter.Cur)); 6078d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Item->hasRegTmp()); 6088d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()]; 6098d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // We add the Item's weight to each alias/subregister to represent that, 6108d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // should we decide to pick any of them, then we would incur that many 6118d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // memory accesses. 6128d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegWeight W = Item->getWeight(Func); 6138d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 6148d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 6158d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Weights[RegAlias].addWeight(W); 6168d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6178d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6188d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Same as above, but check Inactive ranges instead of Active. 6198d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (const Variable *Item : Inactive) { 6208d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (!Item->rangeOverlaps(Iter.Cur)) 6218d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang continue; 6228d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(Item->hasRegTmp()); 6238d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()]; 6248d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegWeight W = Item->getWeight(Func); 6258d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 6268d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 6278d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Weights[RegAlias].addWeight(W); 6288d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6298d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6308d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 6318d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // All the weights are now calculated. Find the register with smallest 6328d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // weight. 6338d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang int32_t MinWeightIndex = Iter.RegMask.find_first(); 6348d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // MinWeightIndex must be valid because of the initial RegMask.any() test. 6358d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(MinWeightIndex >= 0); 6368d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT i = MinWeightIndex + 1; i < Iter.Weights.size(); ++i) { 6378d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.RegMask[i] && Iter.Weights[i] < Iter.Weights[MinWeightIndex]) 6388d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang MinWeightIndex = i; 6398d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6408d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 6418d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) { 6428d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Cur doesn't have priority over any other live ranges, so don't allocate 6438d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // any register to it, and move it to the Handled state. 6448d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Handled.push_back(Iter.Cur); 6458d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Iter.Cur->mustHaveReg()) { 6468d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Kind == RAK_Phi) 6478d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang addSpillFill(Iter); 6488d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang else 6498d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Func->setError("Unable to find a physical register for an " 6508d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang "infinite-weight live range"); 6518d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6528d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } else { 6538d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Evict all live ranges in Active that register number MinWeightIndex is 6548d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // assigned to. 6558d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT I = Active.size(); I > 0; --I) { 6568d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const SizeT Index = I - 1; 6578d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Item = Active[Index]; 658284c8400e8c5518b688c7bca66cc73c55532ac39qwang if (Item->getRegNumTmp() == MinWeightIndex) { 6598d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Evicting ", Item); 6608d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex]; 66101f352a7ca57e2bc5ed0f72e5b1d50b6de4469ederic_tian for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 6628d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 6638d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang --RegUses[RegAlias]; 6648d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegUses[RegAlias] >= 0); 6658d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6668d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Item->setRegNumTmp(Variable::NoRegister); 6678d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Active, Index, Handled); 6688d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6698d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6708d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Do the same for Inactive. 6718d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (SizeT I = Inactive.size(); I > 0; --I) { 6728d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const SizeT Index = I - 1; 6738d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Variable *Item = Inactive[Index]; 6748d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Note: The Item->rangeOverlaps(Cur) clause is not part of the 6758d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // description of AssignMemLoc() in the original paper. But there doesn't 6768d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // seem to be any need to evict an inactive live range that doesn't 6778d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // overlap with the live range currently being considered. It's 6788d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // especially bad if we would end up evicting an infinite-weight but 6798d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // currently-inactive live range. The most common situation for this 680130e25699a6d060c966978e345c6c8eeed85d065yshang // would be a scratch register kill set for call instructions. 6819cad030bc14e706d8986ed33f0fa8b28f0828c48yshang if (Item->getRegNumTmp() == MinWeightIndex && 682130e25699a6d060c966978e345c6c8eeed85d065yshang Item->rangeOverlaps(Iter.Cur)) { 6838d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Evicting ", Item); 684130e25699a6d060c966978e345c6c8eeed85d065yshang Item->setRegNumTmp(Variable::NoRegister); 6858d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang moveItem(Inactive, Index, Handled); 6868d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6878d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6888d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Assign the register to Cur. 6898d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Iter.Cur->setRegNumTmp(MinWeightIndex); 6908d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex]; 6918d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0; 6928d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang RegAlias = Aliases.find_next(RegAlias)) { 6938d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang assert(RegUses[RegAlias] >= 0); 6948d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang ++RegUses[RegAlias]; 6958d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6968d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang Active.push_back(Iter.Cur); 6978d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang dumpLiveRangeTrace("Allocating ", Iter.Cur); 6988d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang } 6998d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang} 7008d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang 7018d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshangvoid LinearScan::assignFinalRegisters( 7028d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &RegMaskFull, 7038d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) { 7048d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang const size_t NumRegisters = RegMaskFull.size(); 7058d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters); 7068d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang if (Randomized) { 7078d3a5c82a408c6a7c4c8aa96d0be6c99b8070ac5yshang // Create a random number generator for regalloc randomization. Merge 708 // function's sequence and Kind value as the Salt. Because regAlloc() is 709 // called twice under O2, the second time with RAK_Phi, we check Kind == 710 // RAK_Phi to determine the lowest-order bit to make sure the Salt is 711 // different. 712 uint64_t Salt = 713 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); 714 Target->makeRandomRegisterPermutation( 715 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt); 716 } 717 718 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof) 719 // for each Variable. 720 for (Variable *Item : Handled) { 721 int32_t RegNum = Item->getRegNumTmp(); 722 int32_t AssignedRegNum = RegNum; 723 724 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) { 725 AssignedRegNum = Permutation[RegNum]; 726 } 727 if (Verbose) { 728 Ostream &Str = Ctx->getStrDump(); 729 if (!Item->hasRegTmp()) { 730 Str << "Not assigning "; 731 Item->dump(Func); 732 Str << "\n"; 733 } else { 734 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning " 735 : "Assigning ") 736 << Target->getRegName(AssignedRegNum, IceType_i32) << "(r" 737 << AssignedRegNum << ") to "; 738 Item->dump(Func); 739 Str << "\n"; 740 } 741 } 742 Item->setRegNum(AssignedRegNum); 743 } 744} 745 746// Implements the linear-scan algorithm. Based on "Linear Scan Register 747// Allocation in the Context of SSA Form and Register Constraints" by Hanspeter 748// Mössenböck and Michael Pfeiffer, 749// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is 750// modified to take affinity into account and allow two interfering variables 751// to share the same register in certain cases. 752// 753// Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results 754// are assigned to Variable::RegNum for each Variable. 755void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull, 756 bool Randomized) { 757 TimerMarker T(TimerStack::TT_linearScan, Func); 758 assert(RegMaskFull.any()); // Sanity check 759 if (Verbose) 760 Ctx->lockStr(); 761 Func->resetCurrentNode(); 762 const size_t NumRegisters = RegMaskFull.size(); 763 llvm::SmallBitVector PreDefinedRegisters(NumRegisters); 764 if (Randomized) { 765 for (Variable *Var : UnhandledPrecolored) { 766 PreDefinedRegisters[Var->getRegNum()] = true; 767 } 768 } 769 770 // Build a LiveRange representing the Kills list. 771 LiveRange KillsRange(Kills); 772 KillsRange.untrim(); 773 774 // Reset the register use count 775 RegUses.resize(NumRegisters); 776 std::fill(RegUses.begin(), RegUses.end(), 0); 777 778 // Unhandled is already set to all ranges in increasing order of start 779 // points. 780 assert(Active.empty()); 781 assert(Inactive.empty()); 782 assert(Handled.empty()); 783 const TargetLowering::RegSetMask RegsInclude = 784 TargetLowering::RegSet_CallerSave; 785 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None; 786 const llvm::SmallBitVector KillsMask = 787 Target->getRegisterSet(RegsInclude, RegsExclude); 788 789 // Allocate memory once outside the loop 790 IterationState Iter; 791 Iter.Weights.reserve(NumRegisters); 792 Iter.PrecoloredUnhandledMask.reserve(NumRegisters); 793 794 while (!Unhandled.empty()) { 795 Iter.Cur = Unhandled.back(); 796 Unhandled.pop_back(); 797 dumpLiveRangeTrace("\nConsidering ", Iter.Cur); 798 Iter.RegMask = 799 RegMaskFull & Target->getRegisterSetForType(Iter.Cur->getType()); 800 KillsRange.trim(Iter.Cur->getLiveRange().getStart()); 801 802 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets 803 // that register. Previously processed live ranges would have avoided that 804 // register due to it being pre-colored. Future processed live ranges won't 805 // evict that register because the live range has infinite weight. 806 if (Iter.Cur->hasReg()) { 807 allocatePrecoloredRegister(Iter.Cur); 808 continue; 809 } 810 811 handleActiveRangeExpiredOrInactive(Iter.Cur); 812 handleInactiveRangeExpiredOrReactivated(Iter.Cur); 813 814 // Calculate available registers into Free[]. 815 Iter.Free = Iter.RegMask; 816 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { 817 if (RegUses[i] > 0) 818 Iter.Free[i] = false; 819 } 820 821 findRegisterPreference(Iter); 822 filterFreeWithInactiveRanges(Iter); 823 824 // Disable AllowOverlap if an Active variable, which is not Prefer, shares 825 // Prefer's register, and has a definition within Cur's live range. 826 if (Iter.AllowOverlap) { 827 for (const Variable *Item : Active) { 828 int32_t RegNum = Item->getRegNumTmp(); 829 if (Item != Iter.Prefer && RegNum == Iter.PreferReg && 830 overlapsDefs(Func, Iter.Cur, Item)) { 831 Iter.AllowOverlap = false; 832 dumpDisableOverlap(Func, Item, "Active"); 833 } 834 } 835 } 836 837 Iter.Weights.resize(Iter.RegMask.size()); 838 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight()); 839 840 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size()); 841 Iter.PrecoloredUnhandledMask.reset(); 842 843 filterFreeWithPrecoloredRanges(Iter); 844 845 // Remove scratch registers from the Free[] list, and mark their Weights[] 846 // as infinite, if KillsRange overlaps Cur's live range. 847 constexpr bool UseTrimmed = true; 848 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) { 849 Iter.Free.reset(KillsMask); 850 for (int i = KillsMask.find_first(); i != -1; 851 i = KillsMask.find_next(i)) { 852 Iter.Weights[i].setWeight(RegWeight::Inf); 853 if (Iter.PreferReg == i) 854 Iter.AllowOverlap = false; 855 } 856 } 857 858 // Print info about physical register availability. 859 if (Verbose) { 860 Ostream &Str = Ctx->getStrDump(); 861 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) { 862 if (Iter.RegMask[i]) { 863 Str << Target->getRegName(i, IceType_i32) << "(U=" << RegUses[i] 864 << ",F=" << Iter.Free[i] 865 << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") "; 866 } 867 } 868 Str << "\n"; 869 } 870 871 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) { 872 // First choice: a preferred register that is either free or is allowed 873 // to overlap with its linked variable. 874 allocatePreferredRegister(Iter); 875 } else if (Iter.Free.any()) { 876 // Second choice: any free register. 877 allocateFreeRegister(Iter); 878 } else { 879 // Fallback: there are no free registers, so we look for the 880 // lowest-weight register and see if Cur has higher weight. 881 handleNoFreeRegisters(Iter); 882 } 883 dump(Func); 884 } 885 886 // Move anything Active or Inactive to Handled for easier handling. 887 Handled.insert(Handled.end(), Active.begin(), Active.end()); 888 Active.clear(); 889 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end()); 890 Inactive.clear(); 891 dump(Func); 892 893 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized); 894 895 // TODO: Consider running register allocation one more time, with infinite 896 // registers, for two reasons. First, evicted live ranges get a second chance 897 // for a register. Second, it allows coalescing of stack slots. If there is 898 // no time budget for the second register allocation run, each unallocated 899 // variable just gets its own slot. 900 // 901 // Another idea for coalescing stack slots is to initialize the Unhandled 902 // list with just the unallocated variables, saving time but not offering 903 // second-chance opportunities. 904 905 if (Verbose) 906 Ctx->unlockStr(); 907} 908 909// ======================== Dump routines ======================== // 910 911void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) { 912 if (!BuildDefs::dump()) 913 return; 914 915 if (Verbose) { 916 Ostream &Str = Ctx->getStrDump(); 917 Str << Label; 918 dumpLiveRange(Item, Func); 919 Str << "\n"; 920 } 921} 922 923void LinearScan::dump(Cfg *Func) const { 924 if (!BuildDefs::dump()) 925 return; 926 if (!Func->isVerbose(IceV_LinearScan)) 927 return; 928 Ostream &Str = Func->getContext()->getStrDump(); 929 Func->resetCurrentNode(); 930 Str << "**** Current regalloc state:\n"; 931 Str << "++++++ Handled:\n"; 932 for (const Variable *Item : Handled) { 933 dumpLiveRange(Item, Func); 934 Str << "\n"; 935 } 936 Str << "++++++ Unhandled:\n"; 937 for (const Variable *Item : reverse_range(Unhandled)) { 938 dumpLiveRange(Item, Func); 939 Str << "\n"; 940 } 941 Str << "++++++ Active:\n"; 942 for (const Variable *Item : Active) { 943 dumpLiveRange(Item, Func); 944 Str << "\n"; 945 } 946 Str << "++++++ Inactive:\n"; 947 for (const Variable *Item : Inactive) { 948 dumpLiveRange(Item, Func); 949 Str << "\n"; 950 } 951} 952 953} // end of namespace Ice 954