ExecutionDepsFix.cpp revision a59ce0379134b249a3c949f7dcd6ec3566c4d7e3
1//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the execution dependency fix pass. 11// 12// Some X86 SSE instructions like mov, and, or, xor are available in different 13// variants for different operand types. These variant instructions are 14// equivalent, but on Nehalem and newer cpus there is extra latency 15// transferring data between integer and floating point domains. ARM cores 16// have similar issues when they are configured with both VFP and NEON 17// pipelines. 18// 19// This pass changes the variant instructions to minimize domain crossings. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "execution-fix" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/Passes.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/ADT/PostOrderIterator.h" 30#include "llvm/Support/Allocator.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33using namespace llvm; 34 35/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 36/// of execution domains. 37/// 38/// An open DomainValue represents a set of instructions that can still switch 39/// execution domain. Multiple registers may refer to the same open 40/// DomainValue - they will eventually be collapsed to the same execution 41/// domain. 42/// 43/// A collapsed DomainValue represents a single register that has been forced 44/// into one of more execution domains. There is a separate collapsed 45/// DomainValue for each register, but it may contain multiple execution 46/// domains. A register value is initially created in a single execution 47/// domain, but if we were forced to pay the penalty of a domain crossing, we 48/// keep track of the fact the the register is now available in multiple 49/// domains. 50namespace { 51struct DomainValue { 52 // Basic reference counting. 53 unsigned Refs; 54 55 // Bitmask of available domains. For an open DomainValue, it is the still 56 // possible domains for collapsing. For a collapsed DomainValue it is the 57 // domains where the register is available for free. 58 unsigned AvailableDomains; 59 60 // Position of the last defining instruction. 61 unsigned Dist; 62 63 // Twiddleable instructions using or defining these registers. 64 SmallVector<MachineInstr*, 8> Instrs; 65 66 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 67 // track of the domains where the registers are already available. 68 bool isCollapsed() const { return Instrs.empty(); } 69 70 // Is domain available? 71 bool hasDomain(unsigned domain) const { 72 return AvailableDomains & (1u << domain); 73 } 74 75 // Mark domain as available. 76 void addDomain(unsigned domain) { 77 AvailableDomains |= 1u << domain; 78 } 79 80 // Restrict to a single domain available. 81 void setSingleDomain(unsigned domain) { 82 AvailableDomains = 1u << domain; 83 } 84 85 // Return bitmask of domains that are available and in mask. 86 unsigned getCommonDomains(unsigned mask) const { 87 return AvailableDomains & mask; 88 } 89 90 // First domain available. 91 unsigned getFirstDomain() const { 92 return CountTrailingZeros_32(AvailableDomains); 93 } 94 95 DomainValue() { clear(); } 96 97 void clear() { 98 Refs = AvailableDomains = Dist = 0; 99 Instrs.clear(); 100 } 101}; 102} 103 104namespace { 105class ExeDepsFix : public MachineFunctionPass { 106 static char ID; 107 SpecificBumpPtrAllocator<DomainValue> Allocator; 108 SmallVector<DomainValue*,16> Avail; 109 110 const TargetRegisterClass *const RC; 111 MachineFunction *MF; 112 const TargetInstrInfo *TII; 113 const TargetRegisterInfo *TRI; 114 std::vector<int> AliasMap; 115 const unsigned NumRegs; 116 DomainValue **LiveRegs; 117 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; 118 LiveOutMap LiveOuts; 119 unsigned Distance; 120 121public: 122 ExeDepsFix(const TargetRegisterClass *rc) 123 : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} 124 125 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 126 AU.setPreservesAll(); 127 MachineFunctionPass::getAnalysisUsage(AU); 128 } 129 130 virtual bool runOnMachineFunction(MachineFunction &MF); 131 132 virtual const char *getPassName() const { 133 return "Execution dependency fix"; 134 } 135 136private: 137 // Register mapping. 138 int RegIndex(unsigned Reg); 139 140 // DomainValue allocation. 141 DomainValue *Alloc(int domain = -1); 142 void Recycle(DomainValue*); 143 144 // LiveRegs manipulations. 145 void SetLiveReg(int rx, DomainValue *DV); 146 void Kill(int rx); 147 void Force(int rx, unsigned domain); 148 void Collapse(DomainValue *dv, unsigned domain); 149 bool Merge(DomainValue *A, DomainValue *B); 150 151 void enterBasicBlock(MachineBasicBlock*); 152 void leaveBasicBlock(MachineBasicBlock*); 153 void visitInstr(MachineInstr*); 154 void visitGenericInstr(MachineInstr*); 155 void visitSoftInstr(MachineInstr*, unsigned mask); 156 void visitHardInstr(MachineInstr*, unsigned domain); 157}; 158} 159 160char ExeDepsFix::ID = 0; 161 162/// Translate TRI register number to an index into our smaller tables of 163/// interesting registers. Return -1 for boring registers. 164int ExeDepsFix::RegIndex(unsigned Reg) { 165 assert(Reg < AliasMap.size() && "Invalid register"); 166 return AliasMap[Reg]; 167} 168 169DomainValue *ExeDepsFix::Alloc(int domain) { 170 DomainValue *dv = Avail.empty() ? 171 new(Allocator.Allocate()) DomainValue : 172 Avail.pop_back_val(); 173 dv->Dist = Distance; 174 if (domain >= 0) 175 dv->addDomain(domain); 176 return dv; 177} 178 179void ExeDepsFix::Recycle(DomainValue *dv) { 180 assert(dv && "Cannot recycle NULL"); 181 dv->clear(); 182 Avail.push_back(dv); 183} 184 185/// Set LiveRegs[rx] = dv, updating reference counts. 186void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { 187 assert(unsigned(rx) < NumRegs && "Invalid index"); 188 if (!LiveRegs) { 189 LiveRegs = new DomainValue*[NumRegs]; 190 std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); 191 } 192 193 if (LiveRegs[rx] == dv) 194 return; 195 if (LiveRegs[rx]) { 196 assert(LiveRegs[rx]->Refs && "Bad refcount"); 197 if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); 198 } 199 LiveRegs[rx] = dv; 200 if (dv) ++dv->Refs; 201} 202 203// Kill register rx, recycle or collapse any DomainValue. 204void ExeDepsFix::Kill(int rx) { 205 assert(unsigned(rx) < NumRegs && "Invalid index"); 206 if (!LiveRegs || !LiveRegs[rx]) return; 207 208 // Before killing the last reference to an open DomainValue, collapse it to 209 // the first available domain. 210 if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) 211 Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); 212 else 213 SetLiveReg(rx, 0); 214} 215 216/// Force register rx into domain. 217void ExeDepsFix::Force(int rx, unsigned domain) { 218 assert(unsigned(rx) < NumRegs && "Invalid index"); 219 DomainValue *dv; 220 if (LiveRegs && (dv = LiveRegs[rx])) { 221 if (dv->isCollapsed()) 222 dv->addDomain(domain); 223 else if (dv->hasDomain(domain)) 224 Collapse(dv, domain); 225 else { 226 // This is an incompatible open DomainValue. Collapse it to whatever and 227 // force the new value into domain. This costs a domain crossing. 228 Collapse(dv, dv->getFirstDomain()); 229 assert(LiveRegs[rx] && "Not live after collapse?"); 230 LiveRegs[rx]->addDomain(domain); 231 } 232 } else { 233 // Set up basic collapsed DomainValue. 234 SetLiveReg(rx, Alloc(domain)); 235 } 236} 237 238/// Collapse open DomainValue into given domain. If there are multiple 239/// registers using dv, they each get a unique collapsed DomainValue. 240void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { 241 assert(dv->hasDomain(domain) && "Cannot collapse"); 242 243 // Collapse all the instructions. 244 while (!dv->Instrs.empty()) 245 TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); 246 dv->setSingleDomain(domain); 247 248 // If there are multiple users, give them new, unique DomainValues. 249 if (LiveRegs && dv->Refs > 1) 250 for (unsigned rx = 0; rx != NumRegs; ++rx) 251 if (LiveRegs[rx] == dv) 252 SetLiveReg(rx, Alloc(domain)); 253} 254 255/// Merge - All instructions and registers in B are moved to A, and B is 256/// released. 257bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { 258 assert(!A->isCollapsed() && "Cannot merge into collapsed"); 259 assert(!B->isCollapsed() && "Cannot merge from collapsed"); 260 if (A == B) 261 return true; 262 // Restrict to the domains that A and B have in common. 263 unsigned common = A->getCommonDomains(B->AvailableDomains); 264 if (!common) 265 return false; 266 A->AvailableDomains = common; 267 A->Dist = std::max(A->Dist, B->Dist); 268 A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); 269 for (unsigned rx = 0; rx != NumRegs; ++rx) 270 if (LiveRegs[rx] == B) 271 SetLiveReg(rx, A); 272 return true; 273} 274 275void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { 276 // Try to coalesce live-out registers from predecessors. 277 for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), 278 e = MBB->livein_end(); i != e; ++i) { 279 int rx = RegIndex(*i); 280 if (rx < 0) continue; 281 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), 282 pe = MBB->pred_end(); pi != pe; ++pi) { 283 LiveOutMap::const_iterator fi = LiveOuts.find(*pi); 284 if (fi == LiveOuts.end()) continue; 285 DomainValue *pdv = fi->second[rx]; 286 if (!pdv) continue; 287 if (!LiveRegs || !LiveRegs[rx]) { 288 SetLiveReg(rx, pdv); 289 continue; 290 } 291 292 // We have a live DomainValue from more than one predecessor. 293 if (LiveRegs[rx]->isCollapsed()) { 294 // We are already collapsed, but predecessor is not. Force him. 295 unsigned domain = LiveRegs[rx]->getFirstDomain(); 296 if (!pdv->isCollapsed() && pdv->hasDomain(domain)) 297 Collapse(pdv, domain); 298 continue; 299 } 300 301 // Currently open, merge in predecessor. 302 if (!pdv->isCollapsed()) 303 Merge(LiveRegs[rx], pdv); 304 else 305 Force(rx, pdv->getFirstDomain()); 306 } 307 } 308} 309 310void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { 311 // Save live registers at end of MBB - used by enterBasicBlock(). 312 if (LiveRegs) 313 LiveOuts.insert(std::make_pair(MBB, LiveRegs)); 314 LiveRegs = 0; 315} 316 317void ExeDepsFix::visitInstr(MachineInstr *MI) { 318 if (MI->isDebugValue()) 319 return; 320 ++Distance; 321 std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI); 322 if (domp.first) 323 if (domp.second) 324 visitSoftInstr(MI, domp.second); 325 else 326 visitHardInstr(MI, domp.first); 327 else if (LiveRegs) 328 visitGenericInstr(MI); 329} 330 331// A hard instruction only works in one domain. All input registers will be 332// forced into that domain. 333void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { 334 // Collapse all uses. 335 for (unsigned i = mi->getDesc().getNumDefs(), 336 e = mi->getDesc().getNumOperands(); i != e; ++i) { 337 MachineOperand &mo = mi->getOperand(i); 338 if (!mo.isReg()) continue; 339 int rx = RegIndex(mo.getReg()); 340 if (rx < 0) continue; 341 Force(rx, domain); 342 } 343 344 // Kill all defs and force them. 345 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 346 MachineOperand &mo = mi->getOperand(i); 347 if (!mo.isReg()) continue; 348 int rx = RegIndex(mo.getReg()); 349 if (rx < 0) continue; 350 Kill(rx); 351 Force(rx, domain); 352 } 353} 354 355// A soft instruction can be changed to work in other domains given by mask. 356void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { 357 // Bitmask of available domains for this instruction after taking collapsed 358 // operands into account. 359 unsigned available = mask; 360 361 // Scan the explicit use operands for incoming domains. 362 SmallVector<int, 4> used; 363 if (LiveRegs) 364 for (unsigned i = mi->getDesc().getNumDefs(), 365 e = mi->getDesc().getNumOperands(); i != e; ++i) { 366 MachineOperand &mo = mi->getOperand(i); 367 if (!mo.isReg()) continue; 368 int rx = RegIndex(mo.getReg()); 369 if (rx < 0) continue; 370 if (DomainValue *dv = LiveRegs[rx]) { 371 // Bitmask of domains that dv and available have in common. 372 unsigned common = dv->getCommonDomains(available); 373 // Is it possible to use this collapsed register for free? 374 if (dv->isCollapsed()) { 375 // Restrict available domains to the ones in common with the operand. 376 // If there are no common domains, we must pay the cross-domain 377 // penalty for this operand. 378 if (common) available = common; 379 } else if (common) 380 // Open DomainValue is compatible, save it for merging. 381 used.push_back(rx); 382 else 383 // Open DomainValue is not compatible with instruction. It is useless 384 // now. 385 Kill(rx); 386 } 387 } 388 389 // If the collapsed operands force a single domain, propagate the collapse. 390 if (isPowerOf2_32(available)) { 391 unsigned domain = CountTrailingZeros_32(available); 392 TII->setExecutionDomain(mi, domain); 393 visitHardInstr(mi, domain); 394 return; 395 } 396 397 // Kill off any remaining uses that don't match available, and build a list of 398 // incoming DomainValues that we want to merge. 399 SmallVector<DomainValue*,4> doms; 400 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { 401 int rx = *i; 402 DomainValue *dv = LiveRegs[rx]; 403 // This useless DomainValue could have been missed above. 404 if (!dv->getCommonDomains(available)) { 405 Kill(*i); 406 continue; 407 } 408 // sorted, uniqued insert. 409 bool inserted = false; 410 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); 411 i != e && !inserted; ++i) { 412 if (dv == *i) 413 inserted = true; 414 else if (dv->Dist < (*i)->Dist) { 415 inserted = true; 416 doms.insert(i, dv); 417 } 418 } 419 if (!inserted) 420 doms.push_back(dv); 421 } 422 423 // doms are now sorted in order of appearance. Try to merge them all, giving 424 // priority to the latest ones. 425 DomainValue *dv = 0; 426 while (!doms.empty()) { 427 if (!dv) { 428 dv = doms.pop_back_val(); 429 continue; 430 } 431 432 DomainValue *latest = doms.pop_back_val(); 433 if (Merge(dv, latest)) continue; 434 435 // If latest didn't merge, it is useless now. Kill all registers using it. 436 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) 437 if (LiveRegs[*i] == latest) 438 Kill(*i); 439 } 440 441 // dv is the DomainValue we are going to use for this instruction. 442 if (!dv) 443 dv = Alloc(); 444 dv->Dist = Distance; 445 dv->AvailableDomains = available; 446 dv->Instrs.push_back(mi); 447 448 // Finally set all defs and non-collapsed uses to dv. 449 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { 450 MachineOperand &mo = mi->getOperand(i); 451 if (!mo.isReg()) continue; 452 int rx = RegIndex(mo.getReg()); 453 if (rx < 0) continue; 454 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { 455 Kill(rx); 456 SetLiveReg(rx, dv); 457 } 458 } 459} 460 461void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { 462 // Process explicit defs, kill any relevant registers redefined. 463 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 464 MachineOperand &mo = mi->getOperand(i); 465 if (!mo.isReg()) continue; 466 int rx = RegIndex(mo.getReg()); 467 if (rx < 0) continue; 468 Kill(rx); 469 } 470} 471 472bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { 473 MF = &mf; 474 TII = MF->getTarget().getInstrInfo(); 475 TRI = MF->getTarget().getRegisterInfo(); 476 LiveRegs = 0; 477 Distance = 0; 478 assert(NumRegs == RC->getNumRegs() && "Bad regclass"); 479 480 // If no relevant registers are used in the function, we can skip it 481 // completely. 482 bool anyregs = false; 483 for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); 484 I != E; ++I) 485 if (MF->getRegInfo().isPhysRegUsed(*I)) { 486 anyregs = true; 487 break; 488 } 489 if (!anyregs) return false; 490 491 // Initialize the AliasMap on the first use. 492 if (AliasMap.empty()) { 493 // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, 494 // or -1. 495 AliasMap.resize(TRI->getNumRegs(), -1); 496 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) 497 for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) 498 AliasMap[*AI] = i; 499 } 500 501 MachineBasicBlock *Entry = MF->begin(); 502 ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); 503 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 504 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 505 MachineBasicBlock *MBB = *MBBI; 506 enterBasicBlock(MBB); 507 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 508 ++I) 509 visitInstr(I); 510 leaveBasicBlock(MBB); 511 } 512 513 // Clear the LiveOuts vectors. Should we also collapse any remaining 514 // DomainValues? 515 for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); 516 i != e; ++i) 517 delete[] i->second; 518 LiveOuts.clear(); 519 Avail.clear(); 520 Allocator.DestroyAll(); 521 522 return false; 523} 524 525FunctionPass * 526llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { 527 return new ExeDepsFix(RC); 528} 529