X86VZeroUpper.cpp revision 2990853ea8bf4888b179ac6c493836b83769e87b
13bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
23bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
33bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//                     The LLVM Compiler Infrastructure
43bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
53bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source
63bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// License. See LICENSE.TXT for details.
73bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
83bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
93bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
103bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file defines the pass which inserts x86 AVX vzeroupper instructions
113bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// before calls to SSE encoded functions. This avoids transition latency
123bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// penalty when tranfering control between AVX encoded instructions and old
133bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// SSE encoding mode.
143bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
153bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
163bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
17bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#define DEBUG_TYPE "x86-vzeroupper"
183bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86.h"
193bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86InstrInfo.h"
203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/ADT/Statistic.h"
213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineFunctionPass.h"
223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineInstrBuilder.h"
23bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/CodeGen/MachineRegisterInfo.h"
243bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/Passes.h"
25bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/Debug.h"
26bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/raw_ostream.h"
273bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/Target/TargetInstrInfo.h"
283bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesusing namespace llvm;
293bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
303bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
313bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
323bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesnamespace {
333bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  struct VZeroUpperInserter : public MachineFunctionPass {
343bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    static char ID;
353bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    VZeroUpperInserter() : MachineFunctionPass(ID) {}
363bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
373bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual bool runOnMachineFunction(MachineFunction &MF);
383bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
393bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
403bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
413bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
423bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
433bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  private:
443bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    const TargetInstrInfo *TII; // Machine instruction info.
45bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
46bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Any YMM register live-in to this function?
47bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    bool FnHasLiveInYmm;
48bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
49bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // BBState - Contains the state of each MBB: unknown, clean, dirty
50bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    SmallVector<uint8_t, 8> BBState;
51bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
52bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // BBSolved - Keep track of all MBB which had been already analyzed
53bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // and there is no further processing required.
54bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BitVector BBSolved;
55bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
56bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Machine Basic Blocks are classified according this pass:
57bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
58bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
59bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    until the MBB exit there isn't a instruction using YMM to change
60bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    the state to dirty, or one of the incoming predecessors is unknown
61bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    and there's not a dirty predecessor between them.
62bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
63bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
64bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    instructions using YMM and be marked ST_CLEAN, as long as the state
65bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    is cleaned by a vzeroupper before any call.
66bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
67bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
68bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    vzeroupper instruction.
69bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
70bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_INIT - Placeholder for an empty state set
71bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
72bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    enum {
73bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_UNKNOWN = 0,
74bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_CLEAN   = 1,
75bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_DIRTY   = 2,
76bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_INIT    = 3
77bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    };
78bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
79bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // computeState - Given two states, compute the resulting state, in
80bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // the following way
81bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
82bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  1) One dirty state yields another dirty state
83bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  2) All states must be clean for the result to be clean
84bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  3) If none above and one unknown, the result state is also unknown
85bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
86f7c4d26f77465d731054021ba6bdde556e9f25b7Craig Topper    static unsigned computeState(unsigned PrevState, unsigned CurState) {
87bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_INIT)
88bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return CurState;
89bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
90bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
91bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return ST_DIRTY;
92bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
93bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
94bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return ST_CLEAN;
95bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
96bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return ST_UNKNOWN;
97bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
98bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
993bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  };
1003bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  char VZeroUpperInserter::ID = 0;
1013bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
1023bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
1033bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesFunctionPass *llvm::createX86IssueVZeroUpperPass() {
1043bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return new VZeroUpperInserter();
1053bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
1063bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
107bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool isYmmReg(unsigned Reg) {
108e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky  return (Reg >= X86::YMM0 && Reg <= X86::YMM31);
109e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky}
110bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
111e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovskystatic bool isZmmReg(unsigned Reg) {
112e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky  return (Reg >= X86::ZMM0 && Reg <= X86::ZMM31);
113bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
114bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
115bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
116bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
117bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       E = MRI.livein_end(); I != E; ++I)
118e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky    if (isYmmReg(I->first) || isZmmReg(I->first))
119bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return true;
120bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
121bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return false;
122bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
123bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
124d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovskystatic bool clobbersAllYmmRegs(const MachineOperand &MO) {
125e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky  for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
126e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky    if (!MO.clobbersPhysReg(reg))
127e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky      return false;
128e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky  }
129e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky  for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
130d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky    if (!MO.clobbersPhysReg(reg))
131d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky      return false;
132d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky  }
133d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky  return true;
134d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky}
135d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky
136bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool hasYmmReg(MachineInstr *MI) {
137df8de92083e9cc97999e9f2f7bc7ef1df9ac6258Craig Topper  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
138bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    const MachineOperand &MO = MI->getOperand(i);
139d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky    if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
140d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky      return true;
141bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!MO.isReg())
142bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
143bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (MO.isDebug())
144bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
145bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (isYmmReg(MO.getReg()))
146bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return true;
147bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
148bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return false;
149bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
150bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
1512990853ea8bf4888b179ac6c493836b83769e87bBill Wendling/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
1522990853ea8bf4888b179ac6c493836b83769e87bBill Wendling/// instruction.
1532990853ea8bf4888b179ac6c493836b83769e87bBill Wendlingstatic bool clobbersAnyYmmReg(MachineInstr *MI) {
1542990853ea8bf4888b179ac6c493836b83769e87bBill Wendling  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1552990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    const MachineOperand &MO = MI->getOperand(i);
1562990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    if (!MO.isRegMask())
1572990853ea8bf4888b179ac6c493836b83769e87bBill Wendling      continue;
1582990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
1592990853ea8bf4888b179ac6c493836b83769e87bBill Wendling      if (MO.clobbersPhysReg(reg))
1602990853ea8bf4888b179ac6c493836b83769e87bBill Wendling        return true;
1612990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    }
1622990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
1632990853ea8bf4888b179ac6c493836b83769e87bBill Wendling      if (MO.clobbersPhysReg(reg))
1642990853ea8bf4888b179ac6c493836b83769e87bBill Wendling        return true;
1652990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    }
1662990853ea8bf4888b179ac6c493836b83769e87bBill Wendling  }
1672990853ea8bf4888b179ac6c493836b83769e87bBill Wendling  return false;
1682990853ea8bf4888b179ac6c493836b83769e87bBill Wendling}
1692990853ea8bf4888b179ac6c493836b83769e87bBill Wendling
1703bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// runOnMachineFunction - Loop over all of the basic blocks, inserting
1713bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// vzero upper instructions before function calls.
1723bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
1733bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  TII = MF.getTarget().getInstrInfo();
174bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  MachineRegisterInfo &MRI = MF.getRegInfo();
175bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool EverMadeChange = false;
1763bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
177bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Fast check: if the function doesn't use any ymm registers, we don't need
178bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // to insert any VZEROUPPER instructions.  This is constant-time, so it is
179bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // cheap in the common case of no ymm use.
180bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool YMMUsed = false;
181c909950c384e8234a7b3c5a76b7f79e3f7012cebCraig Topper  const TargetRegisterClass *RC = &X86::VR256RegClass;
182bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
183bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       i != e; i++) {
1849d57cdfa172c1af3712ef52862a1976c57c679a5Jakob Stoklund Olesen    if (!MRI.reg_nodbg_empty(*i)) {
185bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      YMMUsed = true;
186bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      break;
187bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
188bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
189bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (!YMMUsed)
190bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    return EverMadeChange;
1913bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
192bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Pre-compute the existence of any live-in YMM registers to this function
193bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
1943bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
195bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  assert(BBState.empty());
196bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState.resize(MF.getNumBlockIDs(), 0);
197bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBSolved.resize(MF.getNumBlockIDs(), 0);
1983bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
199bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Each BB state depends on all predecessors, loop over until everything
200bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // converges.  (Once we converge, we can implicitly mark everything that is
201bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // still ST_UNKNOWN as ST_CLEAN.)
202bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  while (1) {
203bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    bool MadeChange = false;
2043bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
205bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Process all basic blocks.
206bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
207bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      MadeChange |= processBasicBlock(MF, *I);
2083bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
209bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // If this iteration over the code changed anything, keep iterating.
210bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!MadeChange) break;
211bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EverMadeChange = true;
2123bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
213bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
214bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState.clear();
215bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBSolved.clear();
216bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return EverMadeChange;
2173bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
2183bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
2193bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// processBasicBlock - Loop over all of the instructions in the basic block,
2203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// inserting vzero upper instructions before function calls.
2213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
2223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes                                           MachineBasicBlock &BB) {
2233bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  bool Changed = false;
224bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned BBNum = BB.getNumber();
2253bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
226bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Don't process already solved BBs
227bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (BBSolved[BBNum])
228bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    return false; // No changes
229bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
230bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Check the state of all predecessors
231bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned EntryState = ST_INIT;
232bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
233bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       PE = BB.pred_end(); PI != PE; ++PI) {
234bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
235bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (EntryState == ST_DIRTY)
236bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      break;
237bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
238bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
239bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
240d9b0b025612992a0b724eeca8bdf10b1d7a5c355Benjamin Kramer  // The entry MBB for the function may set the initial state to dirty if
241bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // the function receives any YMM incoming arguments
242df8de92083e9cc97999e9f2f7bc7ef1df9ac6258Craig Topper  if (&BB == MF.begin()) {
243bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EntryState = ST_CLEAN;
244bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (FnHasLiveInYmm)
245bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      EntryState = ST_DIRTY;
246bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
247bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
248bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // The current state is initialized according to the predecessors
249bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned CurState = EntryState;
250bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool BBHasCall = false;
251bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
2523bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
2533bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    DebugLoc dl = I->getDebugLoc();
254531f025361555e7a695eb559ec02645c054ee146Michael Liao    MachineInstr *MI = I;
255531f025361555e7a695eb559ec02645c054ee146Michael Liao
2565a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng    bool isControlFlow = MI->isCall() || MI->isReturn();
257bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
258a20e1e7ef596842127794372244fd5c646f71296Chad Rosier    // Shortcut: don't need to check regular instructions in dirty state.
259bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!isControlFlow && CurState == ST_DIRTY)
260bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
261bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
262bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (hasYmmReg(MI)) {
263bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // We found a ymm-using instruction; this could be an AVX instruction,
264bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // or it could be control flow.
265bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      CurState = ST_DIRTY;
266bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
267bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
2683bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
269bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Check for control-flow out of the current function (which might
270bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // indirectly execute SSE instructions).
271bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!isControlFlow)
272bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
273bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
2742990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    // If the call won't clobber any YMM register, skip it as well. It usually
2752990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    // happens on helper function calls (such as '_chkstk', '_ftol2') where
2762990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    // standard calling convention is not used (RegMask is not used to mark
2772990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    // register clobbered and register usage (def/imp-def/use) is well-dfined
2782990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    // and explicitly specified.
2792990853ea8bf4888b179ac6c493836b83769e87bBill Wendling    if (MI->isCall() && !clobbersAnyYmmReg(MI))
2802990853ea8bf4888b179ac6c493836b83769e87bBill Wendling      continue;
2812990853ea8bf4888b179ac6c493836b83769e87bBill Wendling
282bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BBHasCall = true;
283bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
284bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
285bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // registers. This instruction has zero latency. In addition, the processor
286bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // changes back to Clean state, after which execution of Intel SSE
287bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // instructions or Intel AVX instructions has no transition penalty. Add
288bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // the VZEROUPPER instruction before any function call/return that might
289bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // execute SSE code.
290bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // FIXME: In some cases, we may want to move the VZEROUPPER into a
291bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // predecessor block.
292bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (CurState == ST_DIRTY) {
293bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // Only insert the VZEROUPPER in case the entry state isn't unknown.
294bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // When unknown, only compute the information within the block to have
295bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // it available in the exit if possible, but don't change the block.
296bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (EntryState != ST_UNKNOWN) {
297df8de92083e9cc97999e9f2f7bc7ef1df9ac6258Craig Topper        BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER));
298bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        ++NumVZU;
299bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      }
300bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
301bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // After the inserted VZEROUPPER the state becomes clean again, but
302bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // other YMM may appear before other subsequent calls or even before
303bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // the end of the BB.
304bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      CurState = ST_CLEAN;
3053bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    }
3063bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
3073bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
308bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  DEBUG(dbgs() << "MBB #" << BBNum
309bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman               << ", current state: " << CurState << '\n');
310bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
311bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // A BB can only be considered solved when we both have done all the
312bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // necessary transformations, and have computed the exit state.  This happens
313bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // in two cases:
314bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //  1) We know the entry state: this immediately implies the exit state and
315bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //     all the necessary transformations.
316bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //  2) There are no calls, and and a non-call instruction marks this block:
317bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //     no transformations are necessary, and we know the exit state.
318bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
319bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BBSolved[BBNum] = true;
320bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
321bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (CurState != BBState[BBNum])
322bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    Changed = true;
323bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
324bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState[BBNum] = CurState;
3253bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return Changed;
3263bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
327