ExecutionDepsFix.cpp revision 0ed5f64f1ddf838857a4ce95353906c08b6fafc6
1//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the execution dependency fix pass. 11// 12// Some X86 SSE instructions like mov, and, or, xor are available in different 13// variants for different operand types. These variant instructions are 14// equivalent, but on Nehalem and newer cpus there is extra latency 15// transferring data between integer and floating point domains. ARM cores 16// have similar issues when they are configured with both VFP and NEON 17// pipelines. 18// 19// This pass changes the variant instructions to minimize domain crossings. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "execution-fix" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/Passes.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/ADT/PostOrderIterator.h" 30#include "llvm/Support/Allocator.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33using namespace llvm; 34 35/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 36/// of execution domains. 37/// 38/// An open DomainValue represents a set of instructions that can still switch 39/// execution domain. Multiple registers may refer to the same open 40/// DomainValue - they will eventually be collapsed to the same execution 41/// domain. 42/// 43/// A collapsed DomainValue represents a single register that has been forced 44/// into one of more execution domains. There is a separate collapsed 45/// DomainValue for each register, but it may contain multiple execution 46/// domains. A register value is initially created in a single execution 47/// domain, but if we were forced to pay the penalty of a domain crossing, we 48/// keep track of the fact the the register is now available in multiple 49/// domains. 50namespace { 51struct DomainValue { 52 // Basic reference counting. 53 unsigned Refs; 54 55 // Bitmask of available domains. For an open DomainValue, it is the still 56 // possible domains for collapsing. For a collapsed DomainValue it is the 57 // domains where the register is available for free. 58 unsigned AvailableDomains; 59 60 // Position of the last defining instruction. 61 unsigned Dist; 62 63 // Twiddleable instructions using or defining these registers. 64 SmallVector<MachineInstr*, 8> Instrs; 65 66 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 67 // track of the domains where the registers are already available. 68 bool isCollapsed() const { return Instrs.empty(); } 69 70 // Is domain available? 71 bool hasDomain(unsigned domain) const { 72 return AvailableDomains & (1u << domain); 73 } 74 75 // Mark domain as available. 76 void addDomain(unsigned domain) { 77 AvailableDomains |= 1u << domain; 78 } 79 80 // Restrict to a single domain available. 81 void setSingleDomain(unsigned domain) { 82 AvailableDomains = 1u << domain; 83 } 84 85 // Return bitmask of domains that are available and in mask. 86 unsigned getCommonDomains(unsigned mask) const { 87 return AvailableDomains & mask; 88 } 89 90 // First domain available. 91 unsigned getFirstDomain() const { 92 return CountTrailingZeros_32(AvailableDomains); 93 } 94 95 DomainValue() { clear(); } 96 97 void clear() { 98 Refs = AvailableDomains = Dist = 0; 99 Instrs.clear(); 100 } 101}; 102} 103 104namespace { 105class ExeDepsFix : public MachineFunctionPass { 106 static char ID; 107 SpecificBumpPtrAllocator<DomainValue> Allocator; 108 SmallVector<DomainValue*,16> Avail; 109 110 const TargetRegisterClass *const RC; 111 MachineFunction *MF; 112 const TargetInstrInfo *TII; 113 const TargetRegisterInfo *TRI; 114 std::vector<int> AliasMap; 115 const unsigned NumRegs; 116 DomainValue **LiveRegs; 117 typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; 118 LiveOutMap LiveOuts; 119 unsigned Distance; 120 121public: 122 ExeDepsFix(const TargetRegisterClass *rc) 123 : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} 124 125 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 126 AU.setPreservesAll(); 127 MachineFunctionPass::getAnalysisUsage(AU); 128 } 129 130 virtual bool runOnMachineFunction(MachineFunction &MF); 131 132 virtual const char *getPassName() const { 133 return "Execution dependency fix"; 134 } 135 136private: 137 // Register mapping. 138 int regIndex(unsigned Reg); 139 140 // DomainValue allocation. 141 DomainValue *alloc(int domain = -1); 142 void release(DomainValue*); 143 144 // LiveRegs manipulations. 145 void setLiveReg(int rx, DomainValue *DV); 146 void kill(int rx); 147 void force(int rx, unsigned domain); 148 void collapse(DomainValue *dv, unsigned domain); 149 bool merge(DomainValue *A, DomainValue *B); 150 151 void enterBasicBlock(MachineBasicBlock*); 152 void leaveBasicBlock(MachineBasicBlock*); 153 void visitInstr(MachineInstr*); 154 void visitGenericInstr(MachineInstr*); 155 void visitSoftInstr(MachineInstr*, unsigned mask); 156 void visitHardInstr(MachineInstr*, unsigned domain); 157}; 158} 159 160char ExeDepsFix::ID = 0; 161 162/// Translate TRI register number to an index into our smaller tables of 163/// interesting registers. Return -1 for boring registers. 164int ExeDepsFix::regIndex(unsigned Reg) { 165 assert(Reg < AliasMap.size() && "Invalid register"); 166 return AliasMap[Reg]; 167} 168 169DomainValue *ExeDepsFix::alloc(int domain) { 170 DomainValue *dv = Avail.empty() ? 171 new(Allocator.Allocate()) DomainValue : 172 Avail.pop_back_val(); 173 dv->Dist = Distance; 174 if (domain >= 0) 175 dv->addDomain(domain); 176 return dv; 177} 178 179/// release - Release a reference to DV. When the last reference is released, 180/// collapse if needed. 181void ExeDepsFix::release(DomainValue *DV) { 182 assert(DV && DV->Refs && "Bad DomainValue"); 183 if (--DV->Refs) 184 return; 185 186 // There are no more DV references. Collapse any contained instructions. 187 if (DV->AvailableDomains && !DV->isCollapsed()) 188 collapse(DV, DV->getFirstDomain()); 189 190 DV->clear(); 191 Avail.push_back(DV); 192} 193 194/// Set LiveRegs[rx] = dv, updating reference counts. 195void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { 196 assert(unsigned(rx) < NumRegs && "Invalid index"); 197 if (!LiveRegs) { 198 LiveRegs = new DomainValue*[NumRegs]; 199 std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); 200 } 201 202 if (LiveRegs[rx] == dv) 203 return; 204 if (LiveRegs[rx]) 205 release(LiveRegs[rx]); 206 LiveRegs[rx] = dv; 207 if (dv) ++dv->Refs; 208} 209 210// Kill register rx, recycle or collapse any DomainValue. 211void ExeDepsFix::kill(int rx) { 212 assert(unsigned(rx) < NumRegs && "Invalid index"); 213 if (!LiveRegs || !LiveRegs[rx]) return; 214 215 release(LiveRegs[rx]); 216 LiveRegs[rx] = 0; 217} 218 219/// Force register rx into domain. 220void ExeDepsFix::force(int rx, unsigned domain) { 221 assert(unsigned(rx) < NumRegs && "Invalid index"); 222 DomainValue *dv; 223 if (LiveRegs && (dv = LiveRegs[rx])) { 224 if (dv->isCollapsed()) 225 dv->addDomain(domain); 226 else if (dv->hasDomain(domain)) 227 collapse(dv, domain); 228 else { 229 // This is an incompatible open DomainValue. Collapse it to whatever and 230 // force the new value into domain. This costs a domain crossing. 231 collapse(dv, dv->getFirstDomain()); 232 assert(LiveRegs[rx] && "Not live after collapse?"); 233 LiveRegs[rx]->addDomain(domain); 234 } 235 } else { 236 // Set up basic collapsed DomainValue. 237 setLiveReg(rx, alloc(domain)); 238 } 239} 240 241/// Collapse open DomainValue into given domain. If there are multiple 242/// registers using dv, they each get a unique collapsed DomainValue. 243void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { 244 assert(dv->hasDomain(domain) && "Cannot collapse"); 245 246 // Collapse all the instructions. 247 while (!dv->Instrs.empty()) 248 TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); 249 dv->setSingleDomain(domain); 250 251 // If there are multiple users, give them new, unique DomainValues. 252 if (LiveRegs && dv->Refs > 1) 253 for (unsigned rx = 0; rx != NumRegs; ++rx) 254 if (LiveRegs[rx] == dv) 255 setLiveReg(rx, alloc(domain)); 256} 257 258/// Merge - All instructions and registers in B are moved to A, and B is 259/// released. 260bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { 261 assert(!A->isCollapsed() && "Cannot merge into collapsed"); 262 assert(!B->isCollapsed() && "Cannot merge from collapsed"); 263 if (A == B) 264 return true; 265 // Restrict to the domains that A and B have in common. 266 unsigned common = A->getCommonDomains(B->AvailableDomains); 267 if (!common) 268 return false; 269 A->AvailableDomains = common; 270 A->Dist = std::max(A->Dist, B->Dist); 271 A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); 272 273 // Clear the old DomainValue so we won't try to swizzle instructions twice. 274 B->Instrs.clear(); 275 B->AvailableDomains = 0; 276 277 for (unsigned rx = 0; rx != NumRegs; ++rx) 278 if (LiveRegs[rx] == B) 279 setLiveReg(rx, A); 280 return true; 281} 282 283void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { 284 // Try to coalesce live-out registers from predecessors. 285 for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), 286 e = MBB->livein_end(); i != e; ++i) { 287 int rx = regIndex(*i); 288 if (rx < 0) continue; 289 for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), 290 pe = MBB->pred_end(); pi != pe; ++pi) { 291 LiveOutMap::const_iterator fi = LiveOuts.find(*pi); 292 if (fi == LiveOuts.end()) continue; 293 DomainValue *pdv = fi->second[rx]; 294 if (!pdv || !pdv->AvailableDomains) continue; 295 if (!LiveRegs || !LiveRegs[rx]) { 296 setLiveReg(rx, pdv); 297 continue; 298 } 299 300 // We have a live DomainValue from more than one predecessor. 301 if (LiveRegs[rx]->isCollapsed()) { 302 // We are already collapsed, but predecessor is not. Force him. 303 unsigned domain = LiveRegs[rx]->getFirstDomain(); 304 if (!pdv->isCollapsed() && pdv->hasDomain(domain)) 305 collapse(pdv, domain); 306 continue; 307 } 308 309 // Currently open, merge in predecessor. 310 if (!pdv->isCollapsed()) 311 merge(LiveRegs[rx], pdv); 312 else 313 force(rx, pdv->getFirstDomain()); 314 } 315 } 316} 317 318void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { 319 // Save live registers at end of MBB - used by enterBasicBlock(). 320 if (LiveRegs) 321 LiveOuts.insert(std::make_pair(MBB, LiveRegs)); 322 LiveRegs = 0; 323} 324 325void ExeDepsFix::visitInstr(MachineInstr *MI) { 326 if (MI->isDebugValue()) 327 return; 328 ++Distance; 329 std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI); 330 if (domp.first) 331 if (domp.second) 332 visitSoftInstr(MI, domp.second); 333 else 334 visitHardInstr(MI, domp.first); 335 else if (LiveRegs) 336 visitGenericInstr(MI); 337} 338 339// A hard instruction only works in one domain. All input registers will be 340// forced into that domain. 341void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { 342 // Collapse all uses. 343 for (unsigned i = mi->getDesc().getNumDefs(), 344 e = mi->getDesc().getNumOperands(); i != e; ++i) { 345 MachineOperand &mo = mi->getOperand(i); 346 if (!mo.isReg()) continue; 347 int rx = regIndex(mo.getReg()); 348 if (rx < 0) continue; 349 force(rx, domain); 350 } 351 352 // Kill all defs and force them. 353 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 354 MachineOperand &mo = mi->getOperand(i); 355 if (!mo.isReg()) continue; 356 int rx = regIndex(mo.getReg()); 357 if (rx < 0) continue; 358 kill(rx); 359 force(rx, domain); 360 } 361} 362 363// A soft instruction can be changed to work in other domains given by mask. 364void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { 365 // Bitmask of available domains for this instruction after taking collapsed 366 // operands into account. 367 unsigned available = mask; 368 369 // Scan the explicit use operands for incoming domains. 370 SmallVector<int, 4> used; 371 if (LiveRegs) 372 for (unsigned i = mi->getDesc().getNumDefs(), 373 e = mi->getDesc().getNumOperands(); i != e; ++i) { 374 MachineOperand &mo = mi->getOperand(i); 375 if (!mo.isReg()) continue; 376 int rx = regIndex(mo.getReg()); 377 if (rx < 0) continue; 378 if (DomainValue *dv = LiveRegs[rx]) { 379 // Bitmask of domains that dv and available have in common. 380 unsigned common = dv->getCommonDomains(available); 381 // Is it possible to use this collapsed register for free? 382 if (dv->isCollapsed()) { 383 // Restrict available domains to the ones in common with the operand. 384 // If there are no common domains, we must pay the cross-domain 385 // penalty for this operand. 386 if (common) available = common; 387 } else if (common) 388 // Open DomainValue is compatible, save it for merging. 389 used.push_back(rx); 390 else 391 // Open DomainValue is not compatible with instruction. It is useless 392 // now. 393 kill(rx); 394 } 395 } 396 397 // If the collapsed operands force a single domain, propagate the collapse. 398 if (isPowerOf2_32(available)) { 399 unsigned domain = CountTrailingZeros_32(available); 400 TII->setExecutionDomain(mi, domain); 401 visitHardInstr(mi, domain); 402 return; 403 } 404 405 // Kill off any remaining uses that don't match available, and build a list of 406 // incoming DomainValues that we want to merge. 407 SmallVector<DomainValue*,4> doms; 408 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { 409 int rx = *i; 410 DomainValue *dv = LiveRegs[rx]; 411 // This useless DomainValue could have been missed above. 412 if (!dv->getCommonDomains(available)) { 413 kill(*i); 414 continue; 415 } 416 // sorted, uniqued insert. 417 bool inserted = false; 418 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); 419 i != e && !inserted; ++i) { 420 if (dv == *i) 421 inserted = true; 422 else if (dv->Dist < (*i)->Dist) { 423 inserted = true; 424 doms.insert(i, dv); 425 } 426 } 427 if (!inserted) 428 doms.push_back(dv); 429 } 430 431 // doms are now sorted in order of appearance. Try to merge them all, giving 432 // priority to the latest ones. 433 DomainValue *dv = 0; 434 while (!doms.empty()) { 435 if (!dv) { 436 dv = doms.pop_back_val(); 437 continue; 438 } 439 440 DomainValue *latest = doms.pop_back_val(); 441 if (merge(dv, latest)) continue; 442 443 // If latest didn't merge, it is useless now. Kill all registers using it. 444 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) 445 if (LiveRegs[*i] == latest) 446 kill(*i); 447 } 448 449 // dv is the DomainValue we are going to use for this instruction. 450 if (!dv) 451 dv = alloc(); 452 dv->Dist = Distance; 453 dv->AvailableDomains = available; 454 dv->Instrs.push_back(mi); 455 456 // Finally set all defs and non-collapsed uses to dv. 457 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { 458 MachineOperand &mo = mi->getOperand(i); 459 if (!mo.isReg()) continue; 460 int rx = regIndex(mo.getReg()); 461 if (rx < 0) continue; 462 if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { 463 kill(rx); 464 setLiveReg(rx, dv); 465 } 466 } 467} 468 469void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { 470 // Process explicit defs, kill any relevant registers redefined. 471 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { 472 MachineOperand &mo = mi->getOperand(i); 473 if (!mo.isReg()) continue; 474 int rx = regIndex(mo.getReg()); 475 if (rx < 0) continue; 476 kill(rx); 477 } 478} 479 480bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { 481 MF = &mf; 482 TII = MF->getTarget().getInstrInfo(); 483 TRI = MF->getTarget().getRegisterInfo(); 484 LiveRegs = 0; 485 Distance = 0; 486 assert(NumRegs == RC->getNumRegs() && "Bad regclass"); 487 488 // If no relevant registers are used in the function, we can skip it 489 // completely. 490 bool anyregs = false; 491 for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); 492 I != E; ++I) 493 if (MF->getRegInfo().isPhysRegUsed(*I)) { 494 anyregs = true; 495 break; 496 } 497 if (!anyregs) return false; 498 499 // Initialize the AliasMap on the first use. 500 if (AliasMap.empty()) { 501 // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, 502 // or -1. 503 AliasMap.resize(TRI->getNumRegs(), -1); 504 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) 505 for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) 506 AliasMap[*AI] = i; 507 } 508 509 MachineBasicBlock *Entry = MF->begin(); 510 ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); 511 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 512 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 513 MachineBasicBlock *MBB = *MBBI; 514 enterBasicBlock(MBB); 515 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; 516 ++I) 517 visitInstr(I); 518 leaveBasicBlock(MBB); 519 } 520 521 // Clear the LiveOuts vectors and collapse any remaining DomainValues. 522 for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator 523 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { 524 LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); 525 if (FI == LiveOuts.end()) 526 continue; 527 assert(FI->second && "Null entry"); 528 for (unsigned i = 0, e = NumRegs; i != e; ++i) 529 if (FI->second[i]) 530 release(FI->second[i]); 531 delete[] FI->second; 532 } 533 LiveOuts.clear(); 534 Avail.clear(); 535 Allocator.DestroyAll(); 536 537 return false; 538} 539 540FunctionPass * 541llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { 542 return new ExeDepsFix(RC); 543} 544