ExecutionDepsFix.cpp revision fc7b08da019fcb14f7ca3f0db77b10384809fd8b
1//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the execution dependency fix pass.
11//
12// Some X86 SSE instructions like mov, and, or, xor are available in different
13// variants for different operand types. These variant instructions are
14// equivalent, but on Nehalem and newer cpus there is extra latency
15// transferring data between integer and floating point domains.  ARM cores
16// have similar issues when they are configured with both VFP and NEON
17// pipelines.
18//
19// This pass changes the variant instructions to minimize domain crossings.
20//
21//===----------------------------------------------------------------------===//
22
23#define DEBUG_TYPE "execution-fix"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/Passes.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetMachine.h"
29#include "llvm/ADT/PostOrderIterator.h"
30#include "llvm/Support/Allocator.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33using namespace llvm;
34
35/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
36/// of execution domains.
37///
38/// An open DomainValue represents a set of instructions that can still switch
39/// execution domain. Multiple registers may refer to the same open
40/// DomainValue - they will eventually be collapsed to the same execution
41/// domain.
42///
43/// A collapsed DomainValue represents a single register that has been forced
44/// into one of more execution domains. There is a separate collapsed
45/// DomainValue for each register, but it may contain multiple execution
46/// domains. A register value is initially created in a single execution
47/// domain, but if we were forced to pay the penalty of a domain crossing, we
48/// keep track of the fact the the register is now available in multiple
49/// domains.
50namespace {
51struct DomainValue {
52  // Basic reference counting.
53  unsigned Refs;
54
55  // Bitmask of available domains. For an open DomainValue, it is the still
56  // possible domains for collapsing. For a collapsed DomainValue it is the
57  // domains where the register is available for free.
58  unsigned AvailableDomains;
59
60  // Position of the last defining instruction.
61  unsigned Dist;
62
63  // Pointer to the next DomainValue in a chain.  When two DomainValues are
64  // merged, Victim.Next is set to point to Victor, so old DomainValue
65  // references can be updated by folowing the chain.
66  DomainValue *Next;
67
68  // Twiddleable instructions using or defining these registers.
69  SmallVector<MachineInstr*, 8> Instrs;
70
71  // A collapsed DomainValue has no instructions to twiddle - it simply keeps
72  // track of the domains where the registers are already available.
73  bool isCollapsed() const { return Instrs.empty(); }
74
75  // Is domain available?
76  bool hasDomain(unsigned domain) const {
77    return AvailableDomains & (1u << domain);
78  }
79
80  // Mark domain as available.
81  void addDomain(unsigned domain) {
82    AvailableDomains |= 1u << domain;
83  }
84
85  // Restrict to a single domain available.
86  void setSingleDomain(unsigned domain) {
87    AvailableDomains = 1u << domain;
88  }
89
90  // Return bitmask of domains that are available and in mask.
91  unsigned getCommonDomains(unsigned mask) const {
92    return AvailableDomains & mask;
93  }
94
95  // First domain available.
96  unsigned getFirstDomain() const {
97    return CountTrailingZeros_32(AvailableDomains);
98  }
99
100  DomainValue() : Refs(0) { clear(); }
101
102  // Clear this DomainValue and point to next which has all its data.
103  void clear() {
104    AvailableDomains = Dist = 0;
105    Next = 0;
106    Instrs.clear();
107  }
108};
109}
110
111namespace {
112class ExeDepsFix : public MachineFunctionPass {
113  static char ID;
114  SpecificBumpPtrAllocator<DomainValue> Allocator;
115  SmallVector<DomainValue*,16> Avail;
116
117  const TargetRegisterClass *const RC;
118  MachineFunction *MF;
119  const TargetInstrInfo *TII;
120  const TargetRegisterInfo *TRI;
121  std::vector<int> AliasMap;
122  const unsigned NumRegs;
123  DomainValue **LiveRegs;
124  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
125  LiveOutMap LiveOuts;
126  unsigned Distance;
127
128public:
129  ExeDepsFix(const TargetRegisterClass *rc)
130    : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
131
132  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
133    AU.setPreservesAll();
134    MachineFunctionPass::getAnalysisUsage(AU);
135  }
136
137  virtual bool runOnMachineFunction(MachineFunction &MF);
138
139  virtual const char *getPassName() const {
140    return "Execution dependency fix";
141  }
142
143private:
144  // Register mapping.
145  int regIndex(unsigned Reg);
146
147  // DomainValue allocation.
148  DomainValue *alloc(int domain = -1);
149  DomainValue *retain(DomainValue *DV) {
150    if (DV) ++DV->Refs;
151    return DV;
152  }
153  void release(DomainValue*);
154  DomainValue *resolve(DomainValue*&);
155
156  // LiveRegs manipulations.
157  void setLiveReg(int rx, DomainValue *DV);
158  void kill(int rx);
159  void force(int rx, unsigned domain);
160  void collapse(DomainValue *dv, unsigned domain);
161  bool merge(DomainValue *A, DomainValue *B);
162
163  bool enterBasicBlock(MachineBasicBlock*);
164  void leaveBasicBlock(MachineBasicBlock*);
165  void visitInstr(MachineInstr*);
166  void visitGenericInstr(MachineInstr*);
167  void visitSoftInstr(MachineInstr*, unsigned mask);
168  void visitHardInstr(MachineInstr*, unsigned domain);
169};
170}
171
172char ExeDepsFix::ID = 0;
173
174/// Translate TRI register number to an index into our smaller tables of
175/// interesting registers. Return -1 for boring registers.
176int ExeDepsFix::regIndex(unsigned Reg) {
177  assert(Reg < AliasMap.size() && "Invalid register");
178  return AliasMap[Reg];
179}
180
181DomainValue *ExeDepsFix::alloc(int domain) {
182  DomainValue *dv = Avail.empty() ?
183                      new(Allocator.Allocate()) DomainValue :
184                      Avail.pop_back_val();
185  dv->Dist = Distance;
186  if (domain >= 0)
187    dv->addDomain(domain);
188  assert(dv->Refs == 0 && "Reference count wasn't cleared");
189  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
190  return dv;
191}
192
193/// release - Release a reference to DV.  When the last reference is released,
194/// collapse if needed.
195void ExeDepsFix::release(DomainValue *DV) {
196  while (DV) {
197    assert(DV->Refs && "Bad DomainValue");
198    if (--DV->Refs)
199      return;
200
201    // There are no more DV references. Collapse any contained instructions.
202    if (DV->AvailableDomains && !DV->isCollapsed())
203      collapse(DV, DV->getFirstDomain());
204
205    DomainValue *Next = DV->Next;
206    DV->clear();
207    Avail.push_back(DV);
208    // Also release the next DomainValue in the chain.
209    DV = Next;
210  }
211}
212
213/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
214/// reached.  Update the referenced pointer when necessary.
215DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
216  DomainValue *DV = DVRef;
217  if (!DV || !DV->Next)
218    return DV;
219
220  // DV has a chain. Find the end.
221  do DV = DV->Next;
222  while (DV->Next);
223
224  // Update DVRef to point to DV.
225  retain(DV);
226  release(DVRef);
227  DVRef = DV;
228  return DV;
229}
230
231/// Set LiveRegs[rx] = dv, updating reference counts.
232void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
233  assert(unsigned(rx) < NumRegs && "Invalid index");
234  if (!LiveRegs) {
235    LiveRegs = new DomainValue*[NumRegs];
236    std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
237  }
238
239  if (LiveRegs[rx] == dv)
240    return;
241  if (LiveRegs[rx])
242    release(LiveRegs[rx]);
243  LiveRegs[rx] = retain(dv);
244}
245
246// Kill register rx, recycle or collapse any DomainValue.
247void ExeDepsFix::kill(int rx) {
248  assert(unsigned(rx) < NumRegs && "Invalid index");
249  if (!LiveRegs || !LiveRegs[rx]) return;
250
251  release(LiveRegs[rx]);
252  LiveRegs[rx] = 0;
253}
254
255/// Force register rx into domain.
256void ExeDepsFix::force(int rx, unsigned domain) {
257  assert(unsigned(rx) < NumRegs && "Invalid index");
258  DomainValue *dv;
259  if (LiveRegs && (dv = LiveRegs[rx])) {
260    if (dv->isCollapsed())
261      dv->addDomain(domain);
262    else if (dv->hasDomain(domain))
263      collapse(dv, domain);
264    else {
265      // This is an incompatible open DomainValue. Collapse it to whatever and
266      // force the new value into domain. This costs a domain crossing.
267      collapse(dv, dv->getFirstDomain());
268      assert(LiveRegs[rx] && "Not live after collapse?");
269      LiveRegs[rx]->addDomain(domain);
270    }
271  } else {
272    // Set up basic collapsed DomainValue.
273    setLiveReg(rx, alloc(domain));
274  }
275}
276
277/// Collapse open DomainValue into given domain. If there are multiple
278/// registers using dv, they each get a unique collapsed DomainValue.
279void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
280  assert(dv->hasDomain(domain) && "Cannot collapse");
281
282  // Collapse all the instructions.
283  while (!dv->Instrs.empty())
284    TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
285  dv->setSingleDomain(domain);
286
287  // If there are multiple users, give them new, unique DomainValues.
288  if (LiveRegs && dv->Refs > 1)
289    for (unsigned rx = 0; rx != NumRegs; ++rx)
290      if (LiveRegs[rx] == dv)
291        setLiveReg(rx, alloc(domain));
292}
293
294/// Merge - All instructions and registers in B are moved to A, and B is
295/// released.
296bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
297  assert(!A->isCollapsed() && "Cannot merge into collapsed");
298  assert(!B->isCollapsed() && "Cannot merge from collapsed");
299  if (A == B)
300    return true;
301  // Restrict to the domains that A and B have in common.
302  unsigned common = A->getCommonDomains(B->AvailableDomains);
303  if (!common)
304    return false;
305  A->AvailableDomains = common;
306  A->Dist = std::max(A->Dist, B->Dist);
307  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
308
309  // Clear the old DomainValue so we won't try to swizzle instructions twice.
310  B->clear();
311  // All uses of B are referred to A.
312  B->Next = retain(A);
313
314  for (unsigned rx = 0; rx != NumRegs; ++rx)
315    if (LiveRegs[rx] == B)
316      setLiveReg(rx, A);
317  return true;
318}
319
320// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
321// Return true if some predecessor hasn't been processed yet (like on a loop
322// back-edge).
323bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
324  // Detect back-edges from predecessors we haven't processed yet.
325  bool seenBackEdge = false;
326
327  // Try to coalesce live-out registers from predecessors.
328  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
329         e = MBB->livein_end(); i != e; ++i) {
330    int rx = regIndex(*i);
331    if (rx < 0) continue;
332    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
333           pe = MBB->pred_end(); pi != pe; ++pi) {
334      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
335      if (fi == LiveOuts.end()) {
336        seenBackEdge = true;
337        continue;
338      }
339      if (!fi->second)
340        continue;
341      DomainValue *pdv = resolve(fi->second[rx]);
342      if (!pdv) continue;
343      if (!LiveRegs || !LiveRegs[rx]) {
344        setLiveReg(rx, pdv);
345        continue;
346      }
347
348      // We have a live DomainValue from more than one predecessor.
349      if (LiveRegs[rx]->isCollapsed()) {
350        // We are already collapsed, but predecessor is not. Force him.
351        unsigned domain = LiveRegs[rx]->getFirstDomain();
352        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
353          collapse(pdv, domain);
354        continue;
355      }
356
357      // Currently open, merge in predecessor.
358      if (!pdv->isCollapsed())
359        merge(LiveRegs[rx], pdv);
360      else
361        force(rx, pdv->getFirstDomain());
362    }
363  }
364  return seenBackEdge;
365}
366
367void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
368  // Save live registers at end of MBB - used by enterBasicBlock().
369  // Also use LiveOuts as a visited set to detect back-edges.
370  if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) {
371    // Insertion failed, this must be the second pass.
372    // Release all the DomainValues instead of keeping them.
373    for (unsigned i = 0, e = NumRegs; i != e; ++i)
374      release(LiveRegs[i]);
375    delete[] LiveRegs;
376  }
377  LiveRegs = 0;
378}
379
380void ExeDepsFix::visitInstr(MachineInstr *MI) {
381  if (MI->isDebugValue())
382    return;
383  ++Distance;
384  std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(MI);
385  if (domp.first)
386    if (domp.second)
387      visitSoftInstr(MI, domp.second);
388    else
389      visitHardInstr(MI, domp.first);
390  else if (LiveRegs)
391    visitGenericInstr(MI);
392}
393
394// A hard instruction only works in one domain. All input registers will be
395// forced into that domain.
396void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
397  // Collapse all uses.
398  for (unsigned i = mi->getDesc().getNumDefs(),
399                e = mi->getDesc().getNumOperands(); i != e; ++i) {
400    MachineOperand &mo = mi->getOperand(i);
401    if (!mo.isReg()) continue;
402    int rx = regIndex(mo.getReg());
403    if (rx < 0) continue;
404    force(rx, domain);
405  }
406
407  // Kill all defs and force them.
408  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
409    MachineOperand &mo = mi->getOperand(i);
410    if (!mo.isReg()) continue;
411    int rx = regIndex(mo.getReg());
412    if (rx < 0) continue;
413    kill(rx);
414    force(rx, domain);
415  }
416}
417
418// A soft instruction can be changed to work in other domains given by mask.
419void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
420  // Bitmask of available domains for this instruction after taking collapsed
421  // operands into account.
422  unsigned available = mask;
423
424  // Scan the explicit use operands for incoming domains.
425  SmallVector<int, 4> used;
426  if (LiveRegs)
427    for (unsigned i = mi->getDesc().getNumDefs(),
428                  e = mi->getDesc().getNumOperands(); i != e; ++i) {
429      MachineOperand &mo = mi->getOperand(i);
430      if (!mo.isReg()) continue;
431      int rx = regIndex(mo.getReg());
432      if (rx < 0) continue;
433      if (DomainValue *dv = LiveRegs[rx]) {
434        // Bitmask of domains that dv and available have in common.
435        unsigned common = dv->getCommonDomains(available);
436        // Is it possible to use this collapsed register for free?
437        if (dv->isCollapsed()) {
438          // Restrict available domains to the ones in common with the operand.
439          // If there are no common domains, we must pay the cross-domain
440          // penalty for this operand.
441          if (common) available = common;
442        } else if (common)
443          // Open DomainValue is compatible, save it for merging.
444          used.push_back(rx);
445        else
446          // Open DomainValue is not compatible with instruction. It is useless
447          // now.
448          kill(rx);
449      }
450    }
451
452  // If the collapsed operands force a single domain, propagate the collapse.
453  if (isPowerOf2_32(available)) {
454    unsigned domain = CountTrailingZeros_32(available);
455    TII->setExecutionDomain(mi, domain);
456    visitHardInstr(mi, domain);
457    return;
458  }
459
460  // Kill off any remaining uses that don't match available, and build a list of
461  // incoming DomainValues that we want to merge.
462  SmallVector<DomainValue*,4> doms;
463  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
464    int rx = *i;
465    DomainValue *dv = LiveRegs[rx];
466    // This useless DomainValue could have been missed above.
467    if (!dv->getCommonDomains(available)) {
468      kill(*i);
469      continue;
470    }
471    // sorted, uniqued insert.
472    bool inserted = false;
473    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
474           i != e && !inserted; ++i) {
475      if (dv == *i)
476        inserted = true;
477      else if (dv->Dist < (*i)->Dist) {
478        inserted = true;
479        doms.insert(i, dv);
480      }
481    }
482    if (!inserted)
483      doms.push_back(dv);
484  }
485
486  // doms are now sorted in order of appearance. Try to merge them all, giving
487  // priority to the latest ones.
488  DomainValue *dv = 0;
489  while (!doms.empty()) {
490    if (!dv) {
491      dv = doms.pop_back_val();
492      continue;
493    }
494
495    DomainValue *latest = doms.pop_back_val();
496    if (merge(dv, latest)) continue;
497
498    // If latest didn't merge, it is useless now. Kill all registers using it.
499    for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
500      if (LiveRegs[*i] == latest)
501        kill(*i);
502  }
503
504  // dv is the DomainValue we are going to use for this instruction.
505  if (!dv)
506    dv = alloc();
507  dv->Dist = Distance;
508  dv->AvailableDomains = available;
509  dv->Instrs.push_back(mi);
510
511  // Finally set all defs and non-collapsed uses to dv.
512  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
513    MachineOperand &mo = mi->getOperand(i);
514    if (!mo.isReg()) continue;
515    int rx = regIndex(mo.getReg());
516    if (rx < 0) continue;
517    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
518      kill(rx);
519      setLiveReg(rx, dv);
520    }
521  }
522}
523
524void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
525  // Process explicit defs, kill any relevant registers redefined.
526  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
527    MachineOperand &mo = mi->getOperand(i);
528    if (!mo.isReg()) continue;
529    int rx = regIndex(mo.getReg());
530    if (rx < 0) continue;
531    kill(rx);
532  }
533}
534
535bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
536  MF = &mf;
537  TII = MF->getTarget().getInstrInfo();
538  TRI = MF->getTarget().getRegisterInfo();
539  LiveRegs = 0;
540  Distance = 0;
541  assert(NumRegs == RC->getNumRegs() && "Bad regclass");
542
543  // If no relevant registers are used in the function, we can skip it
544  // completely.
545  bool anyregs = false;
546  for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
547       I != E; ++I)
548    if (MF->getRegInfo().isPhysRegUsed(*I)) {
549      anyregs = true;
550      break;
551    }
552  if (!anyregs) return false;
553
554  // Initialize the AliasMap on the first use.
555  if (AliasMap.empty()) {
556    // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC,
557    // or -1.
558    AliasMap.resize(TRI->getNumRegs(), -1);
559    for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
560      for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
561        AliasMap[*AI] = i;
562  }
563
564  MachineBasicBlock *Entry = MF->begin();
565  ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
566  SmallVector<MachineBasicBlock*, 16> Loops;
567  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
568         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
569    MachineBasicBlock *MBB = *MBBI;
570    if (enterBasicBlock(MBB))
571      Loops.push_back(MBB);
572    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
573        ++I)
574      visitInstr(I);
575    leaveBasicBlock(MBB);
576  }
577
578  // Visit all the loop blocks again in order to merge DomainValues from
579  // back-edges.
580  for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
581    MachineBasicBlock *MBB = Loops[i];
582    enterBasicBlock(MBB);
583    leaveBasicBlock(MBB);
584  }
585
586  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
587  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
588         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
589    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
590    if (FI == LiveOuts.end() || !FI->second)
591      continue;
592    for (unsigned i = 0, e = NumRegs; i != e; ++i)
593      if (FI->second[i])
594        release(FI->second[i]);
595    delete[] FI->second;
596  }
597  LiveOuts.clear();
598  Avail.clear();
599  Allocator.DestroyAll();
600
601  return false;
602}
603
604FunctionPass *
605llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
606  return new ExeDepsFix(RC);
607}
608