X86VZeroUpper.cpp revision bd00a934c653fb1666fa7d18267644b4e9d14e5e
1b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
277349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//
377349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//                     The LLVM Compiler Infrastructure
477349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//
577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// This file is distributed under the University of Illinois Open Source
677349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// License. See LICENSE.TXT for details.
777349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//
877349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//===----------------------------------------------------------------------===//
977349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//
10b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// This file defines the pass which inserts x86 AVX vzeroupper instructions
11b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// before calls to SSE encoded functions. This avoids transition latency
12b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// penalty when tranfering control between AVX encoded instructions and old
1377349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// SSE encoding mode.
1477349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//
1577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//===----------------------------------------------------------------------===//
1677349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek
17aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek#define DEBUG_TYPE "x86-vzeroupper"
1877349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "X86.h"
1977349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "X86InstrInfo.h"
20b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/ADT/Statistic.h"
2177349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "llvm/CodeGen/MachineFunctionPass.h"
22b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/MachineInstrBuilder.h"
23b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/MachineRegisterInfo.h"
24b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/Passes.h"
25e5f4dcb6bd73a10df6eb6c3cfe057c88cb2362ccTed Kremenek#include "llvm/Support/Debug.h"
26b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/Support/raw_ostream.h"
27aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek#include "llvm/Target/TargetInstrInfo.h"
28b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekusing namespace llvm;
29b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
30f116bd654bcdb5d7c22656f224deeb7a67f7d0cdTed KremenekSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
31aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek
32aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremeneknamespace {
33aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek  struct VZeroUpperInserter : public MachineFunctionPass {
34330dddd19406f9cc227e59e0bb0a36ecdc52915eTed Kremenek    static char ID;
3577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek    VZeroUpperInserter() : MachineFunctionPass(ID) {}
36b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
37b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    virtual bool runOnMachineFunction(MachineFunction &MF);
38b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
39b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
40b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
41b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
42b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
43b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  private:
44b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    const TargetInstrInfo *TII; // Machine instruction info.
45b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    MachineBasicBlock *MBB;     // Current basic block
46b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
47b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // Any YMM register live-in to this function?
48b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    bool FnHasLiveInYmm;
49b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
50b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // BBState - Contains the state of each MBB: unknown, clean, dirty
51aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek    SmallVector<uint8_t, 8> BBState;
52240f1f00dda1d481276ea872fe8f8851581a7e6bTed Kremenek
53b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // BBSolved - Keep track of all MBB which had been already analyzed
54b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // and there is no further processing required.
55aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek    BitVector BBSolved;
56b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
57b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // Machine Basic Blocks are classified according this pass:
58b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //
59b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //  ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
60b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //    until the MBB exit there isn't a instruction using YMM to change
61b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //    the state to dirty, or one of the incoming predecessors is unknown
62b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //    and there's not a dirty predecessor between them.
63b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //
64b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //  ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
65b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    //    instructions using YMM and be marked ST_CLEAN, as long as the state
66affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    //    is cleaned by a vzeroupper before any call.
674a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    //
684a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    //  ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
69affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    //    vzeroupper instruction.
705e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek    //
711e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek    //  ST_INIT - Placeholder for an empty state set
72affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    //
73affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    enum {
748cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek      ST_UNKNOWN = 0,
758cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek      ST_CLEAN   = 1,
76b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      ST_DIRTY   = 2,
774a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek      ST_INIT    = 3
784a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    };
794a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek
80b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // computeState - Given two states, compute the resulting state, in
814a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    // the following way
824a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    //
834a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    //  1) One dirty state yields another dirty state
849dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek    //  2) All states must be clean for the result to be clean
85affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    //  3) If none above and one unknown, the result state is also unknown
86affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    //
87affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek    unsigned computeState(unsigned PrevState, unsigned CurState) {
88affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek      if (PrevState == ST_INIT)
89b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek        return CurState;
9007d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek
91b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek      if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
9207d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek        return ST_DIRTY;
9307d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek
9407d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek      if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
95b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek        return ST_CLEAN;
9607d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek
9707d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek      return ST_UNKNOWN;
984a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek    }
994a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek
10007d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek  };
1014d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  char VZeroUpperInserter::ID = 0;
1024d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek}
1034d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
1044d839b4949efe9e2b16eeab679c25b28e31ea742Ted KremenekFunctionPass *llvm::createX86IssueVZeroUpperPass() {
1054d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  return new VZeroUpperInserter();
1064d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek}
1074d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
1084d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenekstatic bool isYmmReg(unsigned Reg) {
1094d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
110b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    return true;
1118cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek
1128cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek  return false;
1138cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek}
1148cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek
1155e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenekstatic bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
1165e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek  for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
1175e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek       E = MRI.livein_end(); I != E; ++I)
1185e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek    if (isYmmReg(I->first))
1192ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek      return true;
1202ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek
1212ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek  return false;
1222ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek}
123b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
124b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekstatic bool hasYmmReg(MachineInstr *MI) {
125b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
126b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    const MachineOperand &MO = MI->getOperand(i);
127b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    if (!MO.isReg())
128b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      continue;
129b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    if (MO.isDebug())
130240f1f00dda1d481276ea872fe8f8851581a7e6bTed Kremenek      continue;
131b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    if (isYmmReg(MO.getReg()))
132b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      return true;
133b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  }
134b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  return false;
135b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek}
136b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
137b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek/// runOnMachineFunction - Loop over all of the basic blocks, inserting
138b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek/// vzero upper instructions before function calls.
139b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
140b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  TII = MF.getTarget().getInstrInfo();
141b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  MachineRegisterInfo &MRI = MF.getRegInfo();
142b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  bool EverMadeChange = false;
143b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
144b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // Fast check: if the function doesn't use any ymm registers, we don't need
145b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // to insert any VZEROUPPER instructions.  This is constant-time, so it is
146b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // cheap in the common case of no ymm use.
147b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  bool YMMUsed = false;
148b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  TargetRegisterClass *RC = X86::VR256RegisterClass;
149b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
150e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek       i != e; i++) {
151e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek    if (MRI.isPhysRegUsed(*i)) {
152ffe0f43806d4823271c2406c1fccc2373115c36aTed Kremenek      YMMUsed = true;
153e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek      break;
154b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    }
155b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  }
156aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek  if (!YMMUsed)
157b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    return EverMadeChange;
1584a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek
1594a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek  // Pre-compute the existence of any live-in YMM registers to this function
160b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
161b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
1624a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek  assert(BBState.empty());
1634a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek  BBState.resize(MF.getNumBlockIDs(), 0);
1649dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek  BBSolved.resize(MF.getNumBlockIDs(), 0);
1659dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek
166b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // Each BB state depends on all predecessors, loop over until everything
167b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // converges.  (Once we converge, we can implicitly mark everything that is
168b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // still ST_UNKNOWN as ST_CLEAN.)
169b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  while (1) {
170b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    bool MadeChange = false;
171b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
172b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // Process all basic blocks.
173b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
1744a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek      MadeChange |= processBasicBlock(MF, *I);
1754a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek
176b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek    // If this iteration over the code changed anything, keep iterating.
177b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek    if (!MadeChange) break;
1784d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    EverMadeChange = true;
1794d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  }
1804d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
1814d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  BBState.clear();
1824d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  BBSolved.clear();
1834d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  return EverMadeChange;
184d763eb91aab5bdecd11825fadb35d6d8cc905f63Ted Kremenek}
185d763eb91aab5bdecd11825fadb35d6d8cc905f63Ted Kremenek
186affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek/// processBasicBlock - Loop over all of the instructions in the basic block,
187affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek/// inserting vzero upper instructions before function calls.
188affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenekbool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
1898cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek                                           MachineBasicBlock &BB) {
1908cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek  bool Changed = false;
1918cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek  unsigned BBNum = BB.getNumber();
1928cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek  MBB = &BB;
1938cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek
1945e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek  // Don't process already solved BBs
1955e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek  if (BBSolved[BBNum])
1965e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek    return false; // No changes
1975e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek
1982ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek  // Check the state of all predecessors
1991e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek  unsigned EntryState = ST_INIT;
2001e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek  for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
2012ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek       PE = BB.pred_end(); PI != PE; ++PI) {
2022ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek    EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
203d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek    if (EntryState == ST_DIRTY)
204d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek      break;
205d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek  }
206d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek
2074a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek
2084a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek  // The entry MBB for the function may set the inital state to dirty if
2094a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek  // the function receives any YMM incoming arguments
210d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek  if (MBB == MF.begin()) {
211d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek    EntryState = ST_CLEAN;
2124d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    if (FnHasLiveInYmm)
2134d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek      EntryState = ST_DIRTY;
2144d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  }
2154d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
2164d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  // The current state is initialized according to the predecessors
2174d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  unsigned CurState = EntryState;
2184d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  bool BBHasCall = false;
2194d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
2204d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
2214d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    MachineInstr *MI = I;
2224d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    DebugLoc dl = I->getDebugLoc();
2234d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn();
2244d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek
2254d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    // Shortcut: don't need to check regular instructions in dirty state.
2264d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek    if (!isControlFlow && CurState == ST_DIRTY)
2274d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek      continue;
228b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
2298cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek    if (hasYmmReg(MI)) {
2308cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek      // We found a ymm-using instruction; this could be an AVX instruction,
2318cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek      // or it could be control flow.
2325e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek      CurState = ST_DIRTY;
2335e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek      continue;
2345e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek    }
2355e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek
2368cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek    // Check for control-flow out of the current function (which might
2372ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek    // indirectly execute SSE instructions).
2382ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek    if (!isControlFlow)
2392ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek      continue;
2402ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek
241b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    BBHasCall = true;
2426a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek
243b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
244b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // registers. This instruction has zero latency. In addition, the processor
2456a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek    // changes back to Clean state, after which execution of Intel SSE
2466a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek    // instructions or Intel AVX instructions has no transition penalty. Add
2476a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek    // the VZEROUPPER instruction before any function call/return that might
2486a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek    // execute SSE code.
2496a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek    // FIXME: In some cases, we may want to move the VZEROUPPER into a
250b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    // predecessor block.
251b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    if (CurState == ST_DIRTY) {
252b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      // Only insert the VZEROUPPER in case the entry state isn't unknown.
253b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      // When unknown, only compute the information within the block to have
254b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      // it available in the exit if possible, but don't change the block.
255b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      if (EntryState != ST_UNKNOWN) {
256b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek        BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
257b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek        ++NumVZU;
258b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      }
259b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
26005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      // After the inserted VZEROUPPER the state becomes clean again, but
26105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      // other YMM may appear before other subsequent calls or even before
26205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      // the end of the BB.
263b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      CurState = ST_CLEAN;
264b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    }
265b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  }
266b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
267b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  DEBUG(dbgs() << "MBB #" << BBNum
268aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek               << ", current state: " << CurState << '\n');
269b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek
270b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // A BB can only be considered solved when we both have done all the
271b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  // necessary transformations, and have computed the exit state.  This happens
272aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek  // in two cases:
273b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  //  1) We know the entry state: this immediately implies the exit state and
274aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek  //     all the necessary transformations.
275aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek  //  2) There are no calls, and and a non-call instruction marks this block:
276b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek  //     no transformations are necessary, and we know the exit state.
27705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
278aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek    BBSolved[BBNum] = true;
2795a7b3821c6abed7f58a53a94eac128bd23d23289Ted Kremenek
28005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  if (CurState != BBState[BBNum])
281b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    Changed = true;
282aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek
283aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek  BBState[BBNum] = CurState;
284aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek  return Changed;
285aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek}
286aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek