ExecutionDepsFix.cpp revision dbc372f47e3a77343e6ef1ab4a88bc46f532f774
1//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the execution dependency fix pass. 11// 12// Some X86 SSE instructions like mov, and, or, xor are available in different 13// variants for different operand types. These variant instructions are 14// equivalent, but on Nehalem and newer cpus there is extra latency 15// transferring data between integer and floating point domains. ARM cores 16// have similar issues when they are configured with both VFP and NEON 17// pipelines. 18// 19// This pass changes the variant instructions to minimize domain crossings. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "execution-fix" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/Passes.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/ADT/PostOrderIterator.h" 30#include "llvm/Support/Allocator.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33using namespace llvm; 34 35/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 36/// of execution domains. 37/// 38/// An open DomainValue represents a set of instructions that can still switch 39/// execution domain. Multiple registers may refer to the same open 40/// DomainValue - they will eventually be collapsed to the same execution 41/// domain. 42/// 43/// A collapsed DomainValue represents a single register that has been forced 44/// into one of more execution domains. There is a separate collapsed 45/// DomainValue for each register, but it may contain multiple execution 46/// domains. A register value is initially created in a single execution 47/// domain, but if we were forced to pay the penalty of a domain crossing, we 48/// keep track of the fact the the register is now available in multiple 49/// domains. 50namespace { 51struct DomainValue { 52 // Basic reference counting. 53 unsigned Refs; 54 55 // Bitmask of available domains. For an open DomainValue, it is the still 56 // possible domains for collapsing. For a collapsed DomainValue it is the 57 // domains where the register is available for free. 58 unsigned AvailableDomains; 59 60 // Position of the last defining instruction. 61 unsigned Dist; 62 63 // Pointer to the next DomainValue in a chain. When two DomainValues are 64 // merged, Victim.Next is set to point to Victor, so old DomainValue 65 // references can be updated by folowing the chain. 66 DomainValue *Next; 67 68 // Twiddleable instructions using or defining these registers. 69 SmallVector<MachineInstr*, 8> Instrs; 70 71 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 72 // track of the domains where the registers are already available. 73 bool isCollapsed() const { return Instrs.empty(); } 74 75 // Is domain available? 76 bool hasDomain(unsigned domain) const { 77 return AvailableDomains & (1u << domain); 78 } 79 80 // Mark domain as available. 81 void addDomain(unsigned domain) { 82 AvailableDomains |= 1u << domain; 83 } 84 85 // Restrict to a single domain available. 86 void setSingleDomain(unsigned domain) { 87 AvailableDomains = 1u << domain; 88 } 89 90 // Return bitmask of domains that are available and in mask. 91 unsigned getCommonDomains(unsigned mask) const { 92 return AvailableDomains & mask; 93 } 94 95 // First domain available. 96 unsigned getFirstDomain() const { 97 return CountTrailingZeros_32(AvailableDomains); 98 } 99 100 DomainValue() : Refs(0) { clear(); } 101 102 // Clear this DomainValue and point to next which has all its data. 103 void clear() { 104 AvailableDomains = Dist = 0; 105 Next = 0; 106 Instrs.clear(); 107 } 108}; 109} 110 111namespace { 112class ExeDepsFix : public MachineFunctionPass { 113 static char ID; 114 SpecificBumpPtrAllocator<DomainValue> Allocator; 115 SmallVector<DomainValue*,16> Avail; 116 117 const TargetRegisterClass *const RC; 118 MachineFunction *MF; 119 const TargetInstrInfo *TII; 120 const TargetRegisterInfo *TRI; 121 std::vector<int> AliasMap; 122 const unsigned NumRegs; 123 DomainValue **LiveRegs; 124 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; 125 LiveOutMap LiveOuts; 126 unsigned Distance; 127 128public: 129 ExeDepsFix(const TargetRegisterClass *rc) 130 : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} 131 132 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 133 AU.setPreservesAll(); 134 MachineFunctionPass::getAnalysisUsage(AU); 135 } 136 137 virtual bool runOnMachineFunction(MachineFunction &MF); 138 139 virtual const char *getPassName() const { 140 return "Execution dependency fix"; 141 } 142 143private: 144 // Register mapping. 145 int regIndex(unsigned Reg); 146 147 // DomainValue allocation. 148 DomainValue *alloc(int domain = -1); 149 DomainValue *retain(DomainValue *DV) { 150 if (DV) ++DV->Refs; 151 return DV; 152 } 153 void release(DomainValue*); 154 DomainValue *resolve(DomainValue*&); 155 156 // LiveRegs manipulations. 157 void setLiveReg(int rx, DomainValue *DV); 158 void kill(int rx); 159 void force(int rx, unsigned domain); 160 void collapse(DomainValue *dv, unsigned domain); 161 bool merge(DomainValue *A, DomainValue *B); 162 163 void enterBasicBlock(MachineBasicBlock*); 164 void leaveBasicBlock(MachineBasicBlock*); 165 void visitInstr(MachineInstr*); 166 void visitGenericInstr(MachineInstr*); 167 void visitSoftInstr(MachineInstr*, unsigned mask); 168 void visitHardInstr(MachineInstr*, unsigned domain); 169}; 170} 171 172char ExeDepsFix::ID = 0; 173 174/// Translate TRI register number to an index into our smaller tables of 175/// interesting registers. Return -1 for boring registers. 176int ExeDepsFix::regIndex(unsigned Reg) { 177 assert(Reg < AliasMap.size() && "Invalid register"); 178 return AliasMap[Reg]; 179} 180 181DomainValue *ExeDepsFix::alloc(int domain) { 182 DomainValue *dv = Avail.empty() ? 183 new(Allocator.Allocate()) DomainValue : 184 Avail.pop_back_val(); 185 dv->Dist = Distance; 186 if (domain >= 0) 187 dv->addDomain(domain); 188 assert(dv->Refs == 0 && "Reference count wasn't cleared"); 189 assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); 190 return dv; 191} 192 193/// release - Release a reference to DV. When the last reference is released, 194/// collapse if needed. 195void ExeDepsFix::release(DomainValue *DV) { 196 while (DV) { 197 assert(DV->Refs && "Bad DomainValue"); 198 if (--DV->Refs) 199 return; 200 201 // There are no more DV references. Collapse any contained instructions. 202 if (DV->AvailableDomains && !DV->isCollapsed()) 203 collapse(DV, DV->getFirstDomain()); 204 205 DomainValue *Next = DV->Next; 206 DV->clear(); 207 Avail.push_back(DV); 208 // Also release the next DomainValue in the chain. 209 DV = Next; 210 } 211} 212 213/// resolve - Follow the chain of dead DomainValues until a live DomainValue is 214/// reached. Update the referenced pointer when necessary. 215DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { 216 DomainValue *DV = DVRef; 217 if (!DV || !DV->Next) 218 return DV; 219 220 // DV has a chain. Find the end. 221 do DV = DV->Next; 222 while (DV->Next); 223 224 // Update DVRef to point to DV. 225 retain(DV); 226 release(DVRef); 227 DVRef = DV; 228 return DV; 229} 230 231/// Set LiveRegs[rx] = dv, updating reference counts. 232void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { 233 assert(unsigned(rx) < NumRegs && "Invalid index"); 234 if (!LiveRegs) { 235 LiveRegs = new DomainValue*[NumRegs]; 236 std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); 237 } 238 239 if (LiveRegs[rx] == dv) 240 return; 241 if (LiveRegs[rx]) 242 release(LiveRegs[rx]); 243 LiveRegs[rx] = retain(dv); 244} 245 246// Kill register rx, recycle or collapse any DomainValue. 247void ExeDepsFix::kill(int rx) { 248 assert(unsigned(rx) < NumRegs && "Invalid index"); 249 if (!LiveRegs || !LiveRegs[rx]) return; 250 251 release(LiveRegs[rx]); 252 LiveRegs[rx] = 0; 253} 254 255/// Force register rx into domain. 256void ExeDepsFix::force(int rx, unsigned domain) { 257 assert(unsigned(rx) < NumRegs && "Invalid index"); 258 DomainValue *dv; 259 if (LiveRegs && (dv = LiveRegs[rx])) { 260 if (dv->isCollapsed()) 261 dv->addDomain(domain); 262 else if (dv->hasDomain(domain)) 263 collapse(dv, domain); 264 else { 265 // This is an incompatible open DomainValue. Collapse it to whatever and 266 // force the new value into domain. This costs a domain crossing. 267 collapse(dv, dv->getFirstDomain()); 268 assert(LiveRegs[rx] && "Not live after collapse?"); 269 LiveRegs[rx]->addDomain(domain); 270 } 271 } else { 272 // Set up basic collapsed DomainValue. 273 setLiveReg(rx, alloc(domain)); 274 } 275} 276 277/// Collapse open DomainValue into given domain. If there are multiple 278/// registers using dv, they each get a unique collapsed DomainValue. 279void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { 280 assert(dv->hasDomain(domain) && "Cannot collapse"); 281 282 // Collapse all the instructions. 283 while (!dv->Instrs.empty()) 284 TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); 285 dv->setSingleDomain(domain); 286 287 // If there are multiple users, give them new, unique DomainValues. 288 if (LiveRegs && dv->Refs > 1) 289 for (unsigned rx = 0; rx != NumRegs; ++rx) 290 if (LiveRegs[rx] == dv) 291 setLiveReg(rx, alloc(domain)); 292} 293 294/// Merge - All instructions and registers in B are moved to A, and B is 295/// released. 296bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { 297 assert(!A->isCollapsed() && "Cannot merge into collapsed"); 298 assert(!B->isCollapsed() && "Cannot merge from collapsed"); 299 if (A == B) 300 return true; 301 // Restrict to the domains that A and B have in common. 302 unsigned common = A->getCommonDomains(B->AvailableDomains); 303 if (!common) 304 return false; 305 A->AvailableDomains = common; 306 A->Dist = std::max(A->Dist, B->Dist); 307 A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); 308 309 // Clear the old DomainValue so we won't try to swizzle instructions twice. 310 B->clear(); 311 // All uses of B are referred to A. 312 B->Next = retain(A); 313 314 for (unsigned rx = 0; rx != NumRegs; ++rx) 315 if (LiveRegs[rx] == B) 316 setLiveReg(rx, A); 317 return true; 318} 319 320void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { 321 // Try to coalesce live-out registers from predecessors. 322 for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), 323 e = MBB->livein_end(); i != e; ++i) { 324 int rx = regIndex(*i); 325 if (rx < 0) continue; 326 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), 327 pe = MBB->pred_end(); pi != pe; ++pi) { 328 LiveOutMap::const_iterator fi = LiveOuts.find(*pi); 329 if (fi == LiveOuts.end()) continue; 330 DomainValue *pdv = resolve(fi->second[rx]); 331 if (!pdv) continue; 332 if (!LiveRegs || !LiveRegs[rx]) { 333 setLiveReg(rx, pdv); 334 continue; 335 } 336 337 // We have a live DomainValue from more than one predecessor. 338 if (LiveRegs[rx]->isCollapsed()) { 339 // We are already collapsed, but predecessor is not. Force him. 340 unsigned domain = LiveRegs[rx]->getFirstDomain(); 341 if (!pdv->isCollapsed() && pdv->hasDomain(domain)) 342 collapse(pdv, domain); 343 continue; 344 } 345 346 // Currently open, merge in predecessor. 347 if (!pdv->isCollapsed()) 348 merge(LiveRegs[rx], pdv); 349 else 350 force(rx, pdv->getFirstDomain()); 351 } 352 } 353} 354 355void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { 356 // Save live registers at end of MBB - used by enterBasicBlock(). 357 if (LiveRegs) 358 LiveOuts.insert(std::make_pair(MBB, LiveRegs)); 359 LiveRegs = 0; 360} 361 362void ExeDepsFix::visitInstr(MachineInstr *MI) { 363 if (MI->isDebugValue()) 364 return; 365 ++Distance; 366 std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI); 367 if (domp.first) 368 if (domp.second) 369 visitSoftInstr(MI, domp.second); 370 else 371 visitHardInstr(MI, domp.first); 372 else if (LiveRegs) 373 visitGenericInstr(MI); 374} 375 376// A hard instruction only works in one domain. All input registers will be 377// forced into that domain. 378void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { 379 // Collapse all uses. 380 for (unsigned i = mi->getDesc().getNumDefs(), 381 e = mi->getDesc().getNumOperands(); i != e; ++i) { 382 MachineOperand &mo = mi->getOperand(i); 383 if (!mo.isReg()) continue; 384 int rx = regIndex(mo.getReg()); 385 if (rx < 0) continue; 386 force(rx, domain); 387 } 388 389 // Kill all defs and force them. 390 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 391 MachineOperand &mo = mi->getOperand(i); 392 if (!mo.isReg()) continue; 393 int rx = regIndex(mo.getReg()); 394 if (rx < 0) continue; 395 kill(rx); 396 force(rx, domain); 397 } 398} 399 400// A soft instruction can be changed to work in other domains given by mask. 401void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { 402 // Bitmask of available domains for this instruction after taking collapsed 403 // operands into account. 404 unsigned available = mask; 405 406 // Scan the explicit use operands for incoming domains. 407 SmallVector<int, 4> used; 408 if (LiveRegs) 409 for (unsigned i = mi->getDesc().getNumDefs(), 410 e = mi->getDesc().getNumOperands(); i != e; ++i) { 411 MachineOperand &mo = mi->getOperand(i); 412 if (!mo.isReg()) continue; 413 int rx = regIndex(mo.getReg()); 414 if (rx < 0) continue; 415 if (DomainValue *dv = LiveRegs[rx]) { 416 // Bitmask of domains that dv and available have in common. 417 unsigned common = dv->getCommonDomains(available); 418 // Is it possible to use this collapsed register for free? 419 if (dv->isCollapsed()) { 420 // Restrict available domains to the ones in common with the operand. 421 // If there are no common domains, we must pay the cross-domain 422 // penalty for this operand. 423 if (common) available = common; 424 } else if (common) 425 // Open DomainValue is compatible, save it for merging. 426 used.push_back(rx); 427 else 428 // Open DomainValue is not compatible with instruction. It is useless 429 // now. 430 kill(rx); 431 } 432 } 433 434 // If the collapsed operands force a single domain, propagate the collapse. 435 if (isPowerOf2_32(available)) { 436 unsigned domain = CountTrailingZeros_32(available); 437 TII->setExecutionDomain(mi, domain); 438 visitHardInstr(mi, domain); 439 return; 440 } 441 442 // Kill off any remaining uses that don't match available, and build a list of 443 // incoming DomainValues that we want to merge. 444 SmallVector<DomainValue*,4> doms; 445 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { 446 int rx = *i; 447 DomainValue *dv = LiveRegs[rx]; 448 // This useless DomainValue could have been missed above. 449 if (!dv->getCommonDomains(available)) { 450 kill(*i); 451 continue; 452 } 453 // sorted, uniqued insert. 454 bool inserted = false; 455 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); 456 i != e && !inserted; ++i) { 457 if (dv == *i) 458 inserted = true; 459 else if (dv->Dist < (*i)->Dist) { 460 inserted = true; 461 doms.insert(i, dv); 462 } 463 } 464 if (!inserted) 465 doms.push_back(dv); 466 } 467 468 // doms are now sorted in order of appearance. Try to merge them all, giving 469 // priority to the latest ones. 470 DomainValue *dv = 0; 471 while (!doms.empty()) { 472 if (!dv) { 473 dv = doms.pop_back_val(); 474 continue; 475 } 476 477 DomainValue *latest = doms.pop_back_val(); 478 if (merge(dv, latest)) continue; 479 480 // If latest didn't merge, it is useless now. Kill all registers using it. 481 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) 482 if (LiveRegs[*i] == latest) 483 kill(*i); 484 } 485 486 // dv is the DomainValue we are going to use for this instruction. 487 if (!dv) 488 dv = alloc(); 489 dv->Dist = Distance; 490 dv->AvailableDomains = available; 491 dv->Instrs.push_back(mi); 492 493 // Finally set all defs and non-collapsed uses to dv. 494 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { 495 MachineOperand &mo = mi->getOperand(i); 496 if (!mo.isReg()) continue; 497 int rx = regIndex(mo.getReg()); 498 if (rx < 0) continue; 499 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { 500 kill(rx); 501 setLiveReg(rx, dv); 502 } 503 } 504} 505 506void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { 507 // Process explicit defs, kill any relevant registers redefined. 508 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 509 MachineOperand &mo = mi->getOperand(i); 510 if (!mo.isReg()) continue; 511 int rx = regIndex(mo.getReg()); 512 if (rx < 0) continue; 513 kill(rx); 514 } 515} 516 517bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { 518 MF = &mf; 519 TII = MF->getTarget().getInstrInfo(); 520 TRI = MF->getTarget().getRegisterInfo(); 521 LiveRegs = 0; 522 Distance = 0; 523 assert(NumRegs == RC->getNumRegs() && "Bad regclass"); 524 525 // If no relevant registers are used in the function, we can skip it 526 // completely. 527 bool anyregs = false; 528 for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); 529 I != E; ++I) 530 if (MF->getRegInfo().isPhysRegUsed(*I)) { 531 anyregs = true; 532 break; 533 } 534 if (!anyregs) return false; 535 536 // Initialize the AliasMap on the first use. 537 if (AliasMap.empty()) { 538 // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, 539 // or -1. 540 AliasMap.resize(TRI->getNumRegs(), -1); 541 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) 542 for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) 543 AliasMap[*AI] = i; 544 } 545 546 MachineBasicBlock *Entry = MF->begin(); 547 ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); 548 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 549 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 550 MachineBasicBlock *MBB = *MBBI; 551 enterBasicBlock(MBB); 552 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 553 ++I) 554 visitInstr(I); 555 leaveBasicBlock(MBB); 556 } 557 558 // Clear the LiveOuts vectors and collapse any remaining DomainValues. 559 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 560 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 561 LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); 562 if (FI == LiveOuts.end()) 563 continue; 564 assert(FI->second && "Null entry"); 565 for (unsigned i = 0, e = NumRegs; i != e; ++i) 566 if (FI->second[i]) 567 release(FI->second[i]); 568 delete[] FI->second; 569 } 570 LiveOuts.clear(); 571 Avail.clear(); 572 Allocator.DestroyAll(); 573 574 return false; 575} 576 577FunctionPass * 578llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { 579 return new ExeDepsFix(RC); 580} 581