X86VZeroUpper.cpp revision a20e1e7ef596842127794372244fd5c646f71296
13bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
23bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
33bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//                     The LLVM Compiler Infrastructure
43bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
53bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source
63bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// License. See LICENSE.TXT for details.
73bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
83bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
93bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
103bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file defines the pass which inserts x86 AVX vzeroupper instructions
113bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// before calls to SSE encoded functions. This avoids transition latency
123bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// penalty when tranfering control between AVX encoded instructions and old
133bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// SSE encoding mode.
143bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
153bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
163bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
17bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#define DEBUG_TYPE "x86-vzeroupper"
183bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86.h"
193bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86InstrInfo.h"
203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/ADT/Statistic.h"
213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineFunctionPass.h"
223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineInstrBuilder.h"
23bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/CodeGen/MachineRegisterInfo.h"
243bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/Passes.h"
25bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/Debug.h"
26bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/raw_ostream.h"
273bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/Target/TargetInstrInfo.h"
283bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesusing namespace llvm;
293bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
303bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
313bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
323bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesnamespace {
333bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  struct VZeroUpperInserter : public MachineFunctionPass {
343bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    static char ID;
353bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    VZeroUpperInserter() : MachineFunctionPass(ID) {}
363bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
373bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual bool runOnMachineFunction(MachineFunction &MF);
383bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
393bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
403bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
413bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
423bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
433bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  private:
443bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    const TargetInstrInfo *TII; // Machine instruction info.
453bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    MachineBasicBlock *MBB;     // Current basic block
46bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
47bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Any YMM register live-in to this function?
48bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    bool FnHasLiveInYmm;
49bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
50bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // BBState - Contains the state of each MBB: unknown, clean, dirty
51bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    SmallVector<uint8_t, 8> BBState;
52bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
53bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // BBSolved - Keep track of all MBB which had been already analyzed
54bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // and there is no further processing required.
55bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BitVector BBSolved;
56bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
57bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Machine Basic Blocks are classified according this pass:
58bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
59bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
60bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    until the MBB exit there isn't a instruction using YMM to change
61bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    the state to dirty, or one of the incoming predecessors is unknown
62bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    and there's not a dirty predecessor between them.
63bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
64bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
65bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    instructions using YMM and be marked ST_CLEAN, as long as the state
66bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    is cleaned by a vzeroupper before any call.
67bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
68bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
69bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //    vzeroupper instruction.
70bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
71bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  ST_INIT - Placeholder for an empty state set
72bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
73bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    enum {
74bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_UNKNOWN = 0,
75bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_CLEAN   = 1,
76bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_DIRTY   = 2,
77bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      ST_INIT    = 3
78bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    };
79bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
80bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // computeState - Given two states, compute the resulting state, in
81bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // the following way
82bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
83bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  1) One dirty state yields another dirty state
84bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  2) All states must be clean for the result to be clean
85bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //  3) If none above and one unknown, the result state is also unknown
86bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    //
87bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    unsigned computeState(unsigned PrevState, unsigned CurState) {
88bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_INIT)
89bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return CurState;
90bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
91bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
92bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return ST_DIRTY;
93bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
94bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
95bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        return ST_CLEAN;
96bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
97bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return ST_UNKNOWN;
98bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
99bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
1003bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  };
1013bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  char VZeroUpperInserter::ID = 0;
1023bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
1033bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
1043bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesFunctionPass *llvm::createX86IssueVZeroUpperPass() {
1053bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return new VZeroUpperInserter();
1063bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
1073bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
108bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool isYmmReg(unsigned Reg) {
109bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
110bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    return true;
111bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
112bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return false;
113bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
114bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
115bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
116bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
117bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       E = MRI.livein_end(); I != E; ++I)
118bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (isYmmReg(I->first))
119bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return true;
120bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
121bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return false;
122bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
123bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
124bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool hasYmmReg(MachineInstr *MI) {
125bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
126bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    const MachineOperand &MO = MI->getOperand(i);
127bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!MO.isReg())
128bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
129bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (MO.isDebug())
130bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
131bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (isYmmReg(MO.getReg()))
132bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      return true;
133bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
134bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return false;
135bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman}
136bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
1373bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// runOnMachineFunction - Loop over all of the basic blocks, inserting
1383bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// vzero upper instructions before function calls.
1393bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
1403bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  TII = MF.getTarget().getInstrInfo();
141bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  MachineRegisterInfo &MRI = MF.getRegInfo();
142bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool EverMadeChange = false;
1433bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
144bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Fast check: if the function doesn't use any ymm registers, we don't need
145bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // to insert any VZEROUPPER instructions.  This is constant-time, so it is
146bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // cheap in the common case of no ymm use.
147bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool YMMUsed = false;
148c909950c384e8234a7b3c5a76b7f79e3f7012cebCraig Topper  const TargetRegisterClass *RC = &X86::VR256RegClass;
149bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
150bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       i != e; i++) {
151bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (MRI.isPhysRegUsed(*i)) {
152bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      YMMUsed = true;
153bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      break;
154bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
155bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
156bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (!YMMUsed)
157bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    return EverMadeChange;
1583bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
159bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Pre-compute the existence of any live-in YMM registers to this function
160bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
1613bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
162bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  assert(BBState.empty());
163bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState.resize(MF.getNumBlockIDs(), 0);
164bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBSolved.resize(MF.getNumBlockIDs(), 0);
1653bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
166bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Each BB state depends on all predecessors, loop over until everything
167bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // converges.  (Once we converge, we can implicitly mark everything that is
168bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // still ST_UNKNOWN as ST_CLEAN.)
169bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  while (1) {
170bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    bool MadeChange = false;
1713bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
172bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Process all basic blocks.
173bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
174bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      MadeChange |= processBasicBlock(MF, *I);
1753bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
176bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // If this iteration over the code changed anything, keep iterating.
177bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!MadeChange) break;
178bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EverMadeChange = true;
1793bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
180bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
181bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState.clear();
182bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBSolved.clear();
183bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  return EverMadeChange;
1843bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
1853bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
1863bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// processBasicBlock - Loop over all of the instructions in the basic block,
1873bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// inserting vzero upper instructions before function calls.
1883bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
1893bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes                                           MachineBasicBlock &BB) {
1903bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  bool Changed = false;
191bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned BBNum = BB.getNumber();
1923bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  MBB = &BB;
1933bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
194bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Don't process already solved BBs
195bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (BBSolved[BBNum])
196bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    return false; // No changes
197bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
198bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // Check the state of all predecessors
199bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned EntryState = ST_INIT;
200bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
201bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman       PE = BB.pred_end(); PI != PE; ++PI) {
202bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
203bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (EntryState == ST_DIRTY)
204bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      break;
205bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
206bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
207bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
208d9b0b025612992a0b724eeca8bdf10b1d7a5c355Benjamin Kramer  // The entry MBB for the function may set the initial state to dirty if
209bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // the function receives any YMM incoming arguments
210bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (MBB == MF.begin()) {
211bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    EntryState = ST_CLEAN;
212bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (FnHasLiveInYmm)
213bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      EntryState = ST_DIRTY;
214bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  }
215bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
216bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // The current state is initialized according to the predecessors
217bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  unsigned CurState = EntryState;
218bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  bool BBHasCall = false;
219bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
2203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
2213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    MachineInstr *MI = I;
2223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    DebugLoc dl = I->getDebugLoc();
2235a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng    bool isControlFlow = MI->isCall() || MI->isReturn();
224bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
225a20e1e7ef596842127794372244fd5c646f71296Chad Rosier    // Shortcut: don't need to check regular instructions in dirty state.
226bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!isControlFlow && CurState == ST_DIRTY)
227bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
228bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
229bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (hasYmmReg(MI)) {
230bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // We found a ymm-using instruction; this could be an AVX instruction,
231bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // or it could be control flow.
232bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      CurState = ST_DIRTY;
233bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
234bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    }
2353bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
236bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // Check for control-flow out of the current function (which might
237bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // indirectly execute SSE instructions).
238bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (!isControlFlow)
239bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      continue;
240bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
241bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BBHasCall = true;
242bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
243bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
244bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // registers. This instruction has zero latency. In addition, the processor
245bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // changes back to Clean state, after which execution of Intel SSE
246bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // instructions or Intel AVX instructions has no transition penalty. Add
247bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // the VZEROUPPER instruction before any function call/return that might
248bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // execute SSE code.
249bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // FIXME: In some cases, we may want to move the VZEROUPPER into a
250bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    // predecessor block.
251bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    if (CurState == ST_DIRTY) {
252bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // Only insert the VZEROUPPER in case the entry state isn't unknown.
253bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // When unknown, only compute the information within the block to have
254bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // it available in the exit if possible, but don't change the block.
255bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      if (EntryState != ST_UNKNOWN) {
256bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
257bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman        ++NumVZU;
258bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      }
259bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
260bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // After the inserted VZEROUPPER the state becomes clean again, but
261bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // other YMM may appear before other subsequent calls or even before
262bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      // the end of the BB.
263bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman      CurState = ST_CLEAN;
2643bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    }
2653bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
2663bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
267bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  DEBUG(dbgs() << "MBB #" << BBNum
268bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman               << ", current state: " << CurState << '\n');
269bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
270bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // A BB can only be considered solved when we both have done all the
271bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // necessary transformations, and have computed the exit state.  This happens
272bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  // in two cases:
273bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //  1) We know the entry state: this immediately implies the exit state and
274bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //     all the necessary transformations.
275bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //  2) There are no calls, and and a non-call instruction marks this block:
276bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  //     no transformations are necessary, and we know the exit state.
277bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
278bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    BBSolved[BBNum] = true;
279bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
280bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  if (CurState != BBState[BBNum])
281bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman    Changed = true;
282bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman
283bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman  BBState[BBNum] = CurState;
2843bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return Changed;
2853bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
286