ExecutionDepsFix.cpp revision fc7b08da019fcb14f7ca3f0db77b10384809fd8b
1//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the execution dependency fix pass. 11// 12// Some X86 SSE instructions like mov, and, or, xor are available in different 13// variants for different operand types. These variant instructions are 14// equivalent, but on Nehalem and newer cpus there is extra latency 15// transferring data between integer and floating point domains. ARM cores 16// have similar issues when they are configured with both VFP and NEON 17// pipelines. 18// 19// This pass changes the variant instructions to minimize domain crossings. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "execution-fix" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/Passes.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/ADT/PostOrderIterator.h" 30#include "llvm/Support/Allocator.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33using namespace llvm; 34 35/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 36/// of execution domains. 37/// 38/// An open DomainValue represents a set of instructions that can still switch 39/// execution domain. Multiple registers may refer to the same open 40/// DomainValue - they will eventually be collapsed to the same execution 41/// domain. 42/// 43/// A collapsed DomainValue represents a single register that has been forced 44/// into one of more execution domains. There is a separate collapsed 45/// DomainValue for each register, but it may contain multiple execution 46/// domains. A register value is initially created in a single execution 47/// domain, but if we were forced to pay the penalty of a domain crossing, we 48/// keep track of the fact the the register is now available in multiple 49/// domains. 50namespace { 51struct DomainValue { 52 // Basic reference counting. 53 unsigned Refs; 54 55 // Bitmask of available domains. For an open DomainValue, it is the still 56 // possible domains for collapsing. For a collapsed DomainValue it is the 57 // domains where the register is available for free. 58 unsigned AvailableDomains; 59 60 // Position of the last defining instruction. 61 unsigned Dist; 62 63 // Pointer to the next DomainValue in a chain. When two DomainValues are 64 // merged, Victim.Next is set to point to Victor, so old DomainValue 65 // references can be updated by folowing the chain. 66 DomainValue *Next; 67 68 // Twiddleable instructions using or defining these registers. 69 SmallVector<MachineInstr*, 8> Instrs; 70 71 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 72 // track of the domains where the registers are already available. 73 bool isCollapsed() const { return Instrs.empty(); } 74 75 // Is domain available? 76 bool hasDomain(unsigned domain) const { 77 return AvailableDomains & (1u << domain); 78 } 79 80 // Mark domain as available. 81 void addDomain(unsigned domain) { 82 AvailableDomains |= 1u << domain; 83 } 84 85 // Restrict to a single domain available. 86 void setSingleDomain(unsigned domain) { 87 AvailableDomains = 1u << domain; 88 } 89 90 // Return bitmask of domains that are available and in mask. 91 unsigned getCommonDomains(unsigned mask) const { 92 return AvailableDomains & mask; 93 } 94 95 // First domain available. 96 unsigned getFirstDomain() const { 97 return CountTrailingZeros_32(AvailableDomains); 98 } 99 100 DomainValue() : Refs(0) { clear(); } 101 102 // Clear this DomainValue and point to next which has all its data. 103 void clear() { 104 AvailableDomains = Dist = 0; 105 Next = 0; 106 Instrs.clear(); 107 } 108}; 109} 110 111namespace { 112class ExeDepsFix : public MachineFunctionPass { 113 static char ID; 114 SpecificBumpPtrAllocator<DomainValue> Allocator; 115 SmallVector<DomainValue*,16> Avail; 116 117 const TargetRegisterClass *const RC; 118 MachineFunction *MF; 119 const TargetInstrInfo *TII; 120 const TargetRegisterInfo *TRI; 121 std::vector<int> AliasMap; 122 const unsigned NumRegs; 123 DomainValue **LiveRegs; 124 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; 125 LiveOutMap LiveOuts; 126 unsigned Distance; 127 128public: 129 ExeDepsFix(const TargetRegisterClass *rc) 130 : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} 131 132 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 133 AU.setPreservesAll(); 134 MachineFunctionPass::getAnalysisUsage(AU); 135 } 136 137 virtual bool runOnMachineFunction(MachineFunction &MF); 138 139 virtual const char *getPassName() const { 140 return "Execution dependency fix"; 141 } 142 143private: 144 // Register mapping. 145 int regIndex(unsigned Reg); 146 147 // DomainValue allocation. 148 DomainValue *alloc(int domain = -1); 149 DomainValue *retain(DomainValue *DV) { 150 if (DV) ++DV->Refs; 151 return DV; 152 } 153 void release(DomainValue*); 154 DomainValue *resolve(DomainValue*&); 155 156 // LiveRegs manipulations. 157 void setLiveReg(int rx, DomainValue *DV); 158 void kill(int rx); 159 void force(int rx, unsigned domain); 160 void collapse(DomainValue *dv, unsigned domain); 161 bool merge(DomainValue *A, DomainValue *B); 162 163 bool enterBasicBlock(MachineBasicBlock*); 164 void leaveBasicBlock(MachineBasicBlock*); 165 void visitInstr(MachineInstr*); 166 void visitGenericInstr(MachineInstr*); 167 void visitSoftInstr(MachineInstr*, unsigned mask); 168 void visitHardInstr(MachineInstr*, unsigned domain); 169}; 170} 171 172char ExeDepsFix::ID = 0; 173 174/// Translate TRI register number to an index into our smaller tables of 175/// interesting registers. Return -1 for boring registers. 176int ExeDepsFix::regIndex(unsigned Reg) { 177 assert(Reg < AliasMap.size() && "Invalid register"); 178 return AliasMap[Reg]; 179} 180 181DomainValue *ExeDepsFix::alloc(int domain) { 182 DomainValue *dv = Avail.empty() ? 183 new(Allocator.Allocate()) DomainValue : 184 Avail.pop_back_val(); 185 dv->Dist = Distance; 186 if (domain >= 0) 187 dv->addDomain(domain); 188 assert(dv->Refs == 0 && "Reference count wasn't cleared"); 189 assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); 190 return dv; 191} 192 193/// release - Release a reference to DV. When the last reference is released, 194/// collapse if needed. 195void ExeDepsFix::release(DomainValue *DV) { 196 while (DV) { 197 assert(DV->Refs && "Bad DomainValue"); 198 if (--DV->Refs) 199 return; 200 201 // There are no more DV references. Collapse any contained instructions. 202 if (DV->AvailableDomains && !DV->isCollapsed()) 203 collapse(DV, DV->getFirstDomain()); 204 205 DomainValue *Next = DV->Next; 206 DV->clear(); 207 Avail.push_back(DV); 208 // Also release the next DomainValue in the chain. 209 DV = Next; 210 } 211} 212 213/// resolve - Follow the chain of dead DomainValues until a live DomainValue is 214/// reached. Update the referenced pointer when necessary. 215DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { 216 DomainValue *DV = DVRef; 217 if (!DV || !DV->Next) 218 return DV; 219 220 // DV has a chain. Find the end. 221 do DV = DV->Next; 222 while (DV->Next); 223 224 // Update DVRef to point to DV. 225 retain(DV); 226 release(DVRef); 227 DVRef = DV; 228 return DV; 229} 230 231/// Set LiveRegs[rx] = dv, updating reference counts. 232void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { 233 assert(unsigned(rx) < NumRegs && "Invalid index"); 234 if (!LiveRegs) { 235 LiveRegs = new DomainValue*[NumRegs]; 236 std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); 237 } 238 239 if (LiveRegs[rx] == dv) 240 return; 241 if (LiveRegs[rx]) 242 release(LiveRegs[rx]); 243 LiveRegs[rx] = retain(dv); 244} 245 246// Kill register rx, recycle or collapse any DomainValue. 247void ExeDepsFix::kill(int rx) { 248 assert(unsigned(rx) < NumRegs && "Invalid index"); 249 if (!LiveRegs || !LiveRegs[rx]) return; 250 251 release(LiveRegs[rx]); 252 LiveRegs[rx] = 0; 253} 254 255/// Force register rx into domain. 256void ExeDepsFix::force(int rx, unsigned domain) { 257 assert(unsigned(rx) < NumRegs && "Invalid index"); 258 DomainValue *dv; 259 if (LiveRegs && (dv = LiveRegs[rx])) { 260 if (dv->isCollapsed()) 261 dv->addDomain(domain); 262 else if (dv->hasDomain(domain)) 263 collapse(dv, domain); 264 else { 265 // This is an incompatible open DomainValue. Collapse it to whatever and 266 // force the new value into domain. This costs a domain crossing. 267 collapse(dv, dv->getFirstDomain()); 268 assert(LiveRegs[rx] && "Not live after collapse?"); 269 LiveRegs[rx]->addDomain(domain); 270 } 271 } else { 272 // Set up basic collapsed DomainValue. 273 setLiveReg(rx, alloc(domain)); 274 } 275} 276 277/// Collapse open DomainValue into given domain. If there are multiple 278/// registers using dv, they each get a unique collapsed DomainValue. 279void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { 280 assert(dv->hasDomain(domain) && "Cannot collapse"); 281 282 // Collapse all the instructions. 283 while (!dv->Instrs.empty()) 284 TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); 285 dv->setSingleDomain(domain); 286 287 // If there are multiple users, give them new, unique DomainValues. 288 if (LiveRegs && dv->Refs > 1) 289 for (unsigned rx = 0; rx != NumRegs; ++rx) 290 if (LiveRegs[rx] == dv) 291 setLiveReg(rx, alloc(domain)); 292} 293 294/// Merge - All instructions and registers in B are moved to A, and B is 295/// released. 296bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { 297 assert(!A->isCollapsed() && "Cannot merge into collapsed"); 298 assert(!B->isCollapsed() && "Cannot merge from collapsed"); 299 if (A == B) 300 return true; 301 // Restrict to the domains that A and B have in common. 302 unsigned common = A->getCommonDomains(B->AvailableDomains); 303 if (!common) 304 return false; 305 A->AvailableDomains = common; 306 A->Dist = std::max(A->Dist, B->Dist); 307 A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); 308 309 // Clear the old DomainValue so we won't try to swizzle instructions twice. 310 B->clear(); 311 // All uses of B are referred to A. 312 B->Next = retain(A); 313 314 for (unsigned rx = 0; rx != NumRegs; ++rx) 315 if (LiveRegs[rx] == B) 316 setLiveReg(rx, A); 317 return true; 318} 319 320// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. 321// Return true if some predecessor hasn't been processed yet (like on a loop 322// back-edge). 323bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { 324 // Detect back-edges from predecessors we haven't processed yet. 325 bool seenBackEdge = false; 326 327 // Try to coalesce live-out registers from predecessors. 328 for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), 329 e = MBB->livein_end(); i != e; ++i) { 330 int rx = regIndex(*i); 331 if (rx < 0) continue; 332 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), 333 pe = MBB->pred_end(); pi != pe; ++pi) { 334 LiveOutMap::const_iterator fi = LiveOuts.find(*pi); 335 if (fi == LiveOuts.end()) { 336 seenBackEdge = true; 337 continue; 338 } 339 if (!fi->second) 340 continue; 341 DomainValue *pdv = resolve(fi->second[rx]); 342 if (!pdv) continue; 343 if (!LiveRegs || !LiveRegs[rx]) { 344 setLiveReg(rx, pdv); 345 continue; 346 } 347 348 // We have a live DomainValue from more than one predecessor. 349 if (LiveRegs[rx]->isCollapsed()) { 350 // We are already collapsed, but predecessor is not. Force him. 351 unsigned domain = LiveRegs[rx]->getFirstDomain(); 352 if (!pdv->isCollapsed() && pdv->hasDomain(domain)) 353 collapse(pdv, domain); 354 continue; 355 } 356 357 // Currently open, merge in predecessor. 358 if (!pdv->isCollapsed()) 359 merge(LiveRegs[rx], pdv); 360 else 361 force(rx, pdv->getFirstDomain()); 362 } 363 } 364 return seenBackEdge; 365} 366 367void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { 368 // Save live registers at end of MBB - used by enterBasicBlock(). 369 // Also use LiveOuts as a visited set to detect back-edges. 370 if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) { 371 // Insertion failed, this must be the second pass. 372 // Release all the DomainValues instead of keeping them. 373 for (unsigned i = 0, e = NumRegs; i != e; ++i) 374 release(LiveRegs[i]); 375 delete[] LiveRegs; 376 } 377 LiveRegs = 0; 378} 379 380void ExeDepsFix::visitInstr(MachineInstr *MI) { 381 if (MI->isDebugValue()) 382 return; 383 ++Distance; 384 std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI); 385 if (domp.first) 386 if (domp.second) 387 visitSoftInstr(MI, domp.second); 388 else 389 visitHardInstr(MI, domp.first); 390 else if (LiveRegs) 391 visitGenericInstr(MI); 392} 393 394// A hard instruction only works in one domain. All input registers will be 395// forced into that domain. 396void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { 397 // Collapse all uses. 398 for (unsigned i = mi->getDesc().getNumDefs(), 399 e = mi->getDesc().getNumOperands(); i != e; ++i) { 400 MachineOperand &mo = mi->getOperand(i); 401 if (!mo.isReg()) continue; 402 int rx = regIndex(mo.getReg()); 403 if (rx < 0) continue; 404 force(rx, domain); 405 } 406 407 // Kill all defs and force them. 408 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 409 MachineOperand &mo = mi->getOperand(i); 410 if (!mo.isReg()) continue; 411 int rx = regIndex(mo.getReg()); 412 if (rx < 0) continue; 413 kill(rx); 414 force(rx, domain); 415 } 416} 417 418// A soft instruction can be changed to work in other domains given by mask. 419void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { 420 // Bitmask of available domains for this instruction after taking collapsed 421 // operands into account. 422 unsigned available = mask; 423 424 // Scan the explicit use operands for incoming domains. 425 SmallVector<int, 4> used; 426 if (LiveRegs) 427 for (unsigned i = mi->getDesc().getNumDefs(), 428 e = mi->getDesc().getNumOperands(); i != e; ++i) { 429 MachineOperand &mo = mi->getOperand(i); 430 if (!mo.isReg()) continue; 431 int rx = regIndex(mo.getReg()); 432 if (rx < 0) continue; 433 if (DomainValue *dv = LiveRegs[rx]) { 434 // Bitmask of domains that dv and available have in common. 435 unsigned common = dv->getCommonDomains(available); 436 // Is it possible to use this collapsed register for free? 437 if (dv->isCollapsed()) { 438 // Restrict available domains to the ones in common with the operand. 439 // If there are no common domains, we must pay the cross-domain 440 // penalty for this operand. 441 if (common) available = common; 442 } else if (common) 443 // Open DomainValue is compatible, save it for merging. 444 used.push_back(rx); 445 else 446 // Open DomainValue is not compatible with instruction. It is useless 447 // now. 448 kill(rx); 449 } 450 } 451 452 // If the collapsed operands force a single domain, propagate the collapse. 453 if (isPowerOf2_32(available)) { 454 unsigned domain = CountTrailingZeros_32(available); 455 TII->setExecutionDomain(mi, domain); 456 visitHardInstr(mi, domain); 457 return; 458 } 459 460 // Kill off any remaining uses that don't match available, and build a list of 461 // incoming DomainValues that we want to merge. 462 SmallVector<DomainValue*,4> doms; 463 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { 464 int rx = *i; 465 DomainValue *dv = LiveRegs[rx]; 466 // This useless DomainValue could have been missed above. 467 if (!dv->getCommonDomains(available)) { 468 kill(*i); 469 continue; 470 } 471 // sorted, uniqued insert. 472 bool inserted = false; 473 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); 474 i != e && !inserted; ++i) { 475 if (dv == *i) 476 inserted = true; 477 else if (dv->Dist < (*i)->Dist) { 478 inserted = true; 479 doms.insert(i, dv); 480 } 481 } 482 if (!inserted) 483 doms.push_back(dv); 484 } 485 486 // doms are now sorted in order of appearance. Try to merge them all, giving 487 // priority to the latest ones. 488 DomainValue *dv = 0; 489 while (!doms.empty()) { 490 if (!dv) { 491 dv = doms.pop_back_val(); 492 continue; 493 } 494 495 DomainValue *latest = doms.pop_back_val(); 496 if (merge(dv, latest)) continue; 497 498 // If latest didn't merge, it is useless now. Kill all registers using it. 499 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) 500 if (LiveRegs[*i] == latest) 501 kill(*i); 502 } 503 504 // dv is the DomainValue we are going to use for this instruction. 505 if (!dv) 506 dv = alloc(); 507 dv->Dist = Distance; 508 dv->AvailableDomains = available; 509 dv->Instrs.push_back(mi); 510 511 // Finally set all defs and non-collapsed uses to dv. 512 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { 513 MachineOperand &mo = mi->getOperand(i); 514 if (!mo.isReg()) continue; 515 int rx = regIndex(mo.getReg()); 516 if (rx < 0) continue; 517 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { 518 kill(rx); 519 setLiveReg(rx, dv); 520 } 521 } 522} 523 524void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { 525 // Process explicit defs, kill any relevant registers redefined. 526 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 527 MachineOperand &mo = mi->getOperand(i); 528 if (!mo.isReg()) continue; 529 int rx = regIndex(mo.getReg()); 530 if (rx < 0) continue; 531 kill(rx); 532 } 533} 534 535bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { 536 MF = &mf; 537 TII = MF->getTarget().getInstrInfo(); 538 TRI = MF->getTarget().getRegisterInfo(); 539 LiveRegs = 0; 540 Distance = 0; 541 assert(NumRegs == RC->getNumRegs() && "Bad regclass"); 542 543 // If no relevant registers are used in the function, we can skip it 544 // completely. 545 bool anyregs = false; 546 for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); 547 I != E; ++I) 548 if (MF->getRegInfo().isPhysRegUsed(*I)) { 549 anyregs = true; 550 break; 551 } 552 if (!anyregs) return false; 553 554 // Initialize the AliasMap on the first use. 555 if (AliasMap.empty()) { 556 // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, 557 // or -1. 558 AliasMap.resize(TRI->getNumRegs(), -1); 559 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) 560 for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) 561 AliasMap[*AI] = i; 562 } 563 564 MachineBasicBlock *Entry = MF->begin(); 565 ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); 566 SmallVector<MachineBasicBlock*, 16> Loops; 567 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 568 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 569 MachineBasicBlock *MBB = *MBBI; 570 if (enterBasicBlock(MBB)) 571 Loops.push_back(MBB); 572 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 573 ++I) 574 visitInstr(I); 575 leaveBasicBlock(MBB); 576 } 577 578 // Visit all the loop blocks again in order to merge DomainValues from 579 // back-edges. 580 for (unsigned i = 0, e = Loops.size(); i != e; ++i) { 581 MachineBasicBlock *MBB = Loops[i]; 582 enterBasicBlock(MBB); 583 leaveBasicBlock(MBB); 584 } 585 586 // Clear the LiveOuts vectors and collapse any remaining DomainValues. 587 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 588 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 589 LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); 590 if (FI == LiveOuts.end() || !FI->second) 591 continue; 592 for (unsigned i = 0, e = NumRegs; i != e; ++i) 593 if (FI->second[i]) 594 release(FI->second[i]); 595 delete[] FI->second; 596 } 597 LiveOuts.clear(); 598 Avail.clear(); 599 Allocator.DestroyAll(); 600 601 return false; 602} 603 604FunctionPass * 605llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { 606 return new ExeDepsFix(RC); 607} 608