13bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===// 23bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// 33bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// The LLVM Compiler Infrastructure 43bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// 53bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source 63bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// License. See LICENSE.TXT for details. 73bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// 83bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===// 93bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// 103bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file defines the pass which inserts x86 AVX vzeroupper instructions 113bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// before calls to SSE encoded functions. This avoids transition latency 123bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// penalty when tranfering control between AVX encoded instructions and old 133bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// SSE encoding mode. 143bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// 153bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===// 163bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 173bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86.h" 183bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86InstrInfo.h" 1936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "X86Subtarget.h" 203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/ADT/Statistic.h" 213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineFunctionPass.h" 223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineInstrBuilder.h" 23bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/CodeGen/MachineRegisterInfo.h" 243bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/Passes.h" 25bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/Debug.h" 26bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman#include "llvm/Support/raw_ostream.h" 273bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/Target/TargetInstrInfo.h" 283bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesusing namespace llvm; 293bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "x86-vzeroupper" 31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 323bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); 333bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 343bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesnamespace { 353bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines class VZeroUpperInserter : public MachineFunctionPass { 3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines public: 383bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines VZeroUpperInserter() : MachineFunctionPass(ID) {} 4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnMachineFunction(MachineFunction &MF) override; 4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const char *getPassName() const override {return "X86 vzeroupper inserter";} 423bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 433bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes private: 44bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 4536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void processBasicBlock(MachineBasicBlock &MBB); 4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void insertVZeroUpper(MachineBasicBlock::iterator I, 4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock &MBB); 4836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void addDirtySuccessor(MachineBasicBlock &MBB); 4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines typedef enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY } BlockExitState; 5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static const char* getBlockExitStateName(BlockExitState ST); 5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Core algorithm state: 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // BlockState - Each block is either: 5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // - PASS_THROUGH: There are neither YMM dirtying instructions nor 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // vzeroupper instructions in this block. 5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this 5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // block that will ensure that YMM is clean on exit. 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // - EXITS_DIRTY: An instruction in the block dirties YMM and no 6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // subsequent vzeroupper in the block clears it. 61bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // AddedToDirtySuccessors - This flag is raised when a block is added to the 6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // DirtySuccessors list to ensure that it's not 6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // added multiple times. 65bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // 6636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FirstUnguardedCall - Records the location of the first unguarded call in 6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // each basic block that may need to be guarded by a 6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // vzeroupper. We won't know whether it actually needs 6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // to be guarded until we discover a predecessor that 7036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // is DIRTY_OUT. 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines struct BlockState { 7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockState() : ExitState(PASS_THROUGH), AddedToDirtySuccessors(false) {} 7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockExitState ExitState; 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool AddedToDirtySuccessors; 7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator FirstUnguardedCall; 76bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman }; 7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines typedef SmallVector<BlockState, 8> BlockStateMap; 7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines typedef SmallVector<MachineBasicBlock*, 8> DirtySuccessorsWorkList; 79bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStateMap BlockStates; 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DirtySuccessorsWorkList DirtySuccessors; 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool EverMadeChange; 8336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetInstrInfo *TII; 84bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static char ID; 863bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes }; 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 883bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes char VZeroUpperInserter::ID = 0; 893bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes} 903bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 913bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesFunctionPass *llvm::createX86IssueVZeroUpperPass() { 923bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes return new VZeroUpperInserter(); 933bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes} 943bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesconst char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) { 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (ST) { 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case PASS_THROUGH: return "Pass-through"; 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case EXITS_DIRTY: return "Exits-dirty"; 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case EXITS_CLEAN: return "Exits-clean"; 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Invalid block exit state."); 102e3809eed34f000581a464689596eefde2a6d1f24Elena Demikhovsky} 103bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool isYmmReg(unsigned Reg) { 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return (Reg >= X86::YMM0 && Reg <= X86::YMM15); 106bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman} 107bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 108bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { 109bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(), 110bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman E = MRI.livein_end(); I != E; ++I) 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isYmmReg(I->first)) 112bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman return true; 113bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 114bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman return false; 115bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman} 116bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 117d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovskystatic bool clobbersAllYmmRegs(const MachineOperand &MO) { 11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { 119d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky if (!MO.clobbersPhysReg(reg)) 120d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky return false; 121d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky } 122d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky return true; 123d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky} 124d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky 125bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedmanstatic bool hasYmmReg(MachineInstr *MI) { 126df8de92083e9cc97999e9f2f7bc7ef1df9ac6258Craig Topper for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 127bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman const MachineOperand &MO = MI->getOperand(i); 128d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO)) 129d29804f80d1cc26ea552b58693ce883f5b13de7aElena Demikhovsky return true; 130bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman if (!MO.isReg()) 131bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman continue; 132bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman if (MO.isDebug()) 133bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman continue; 134bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman if (isYmmReg(MO.getReg())) 135bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman return true; 136bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman } 137bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman return false; 138bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman} 139bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 1402990853ea8bf4888b179ac6c493836b83769e87bBill Wendling/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this 1412990853ea8bf4888b179ac6c493836b83769e87bBill Wendling/// instruction. 14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool callClobbersAnyYmmReg(MachineInstr *MI) { 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert(MI->isCall() && "Can only be called on call instructions."); 1442990853ea8bf4888b179ac6c493836b83769e87bBill Wendling for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1452990853ea8bf4888b179ac6c493836b83769e87bBill Wendling const MachineOperand &MO = MI->getOperand(i); 1462990853ea8bf4888b179ac6c493836b83769e87bBill Wendling if (!MO.isRegMask()) 1472990853ea8bf4888b179ac6c493836b83769e87bBill Wendling continue; 14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { 1492990853ea8bf4888b179ac6c493836b83769e87bBill Wendling if (MO.clobbersPhysReg(reg)) 1502990853ea8bf4888b179ac6c493836b83769e87bBill Wendling return true; 1512990853ea8bf4888b179ac6c493836b83769e87bBill Wendling } 1522990853ea8bf4888b179ac6c493836b83769e87bBill Wendling } 1532990853ea8bf4888b179ac6c493836b83769e87bBill Wendling return false; 1542990853ea8bf4888b179ac6c493836b83769e87bBill Wendling} 1552990853ea8bf4888b179ac6c493836b83769e87bBill Wendling 15636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Insert a vzeroupper instruction before I. 15736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I, 15836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock &MBB) { 15936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DebugLoc dl = I->getDebugLoc(); 16036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER)); 16136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumVZU; 16236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EverMadeChange = true; 16336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 1643bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Add MBB to the DirtySuccessors list if it hasn't already been added. 16636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) { 16736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) { 16836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DirtySuccessors.push_back(&MBB); 16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true; 1703bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes } 1713bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes} 1723bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 1733bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// processBasicBlock - Loop over all of the instructions in the basic block, 1743bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// inserting vzero upper instructions before function calls. 17536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { 176bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Start by assuming that the block PASS_THROUGH, which implies no unguarded 17836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // calls. 17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockExitState CurState = PASS_THROUGH; 18036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end(); 181bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 18236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 183531f025361555e7a695eb559ec02645c054ee146Michael Liao MachineInstr *MI = I; 1845a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng bool isControlFlow = MI->isCall() || MI->isReturn(); 185bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 186a20e1e7ef596842127794372244fd5c646f71296Chad Rosier // Shortcut: don't need to check regular instructions in dirty state. 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!isControlFlow && CurState == EXITS_DIRTY) 188bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman continue; 189bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 190bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman if (hasYmmReg(MI)) { 191bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // We found a ymm-using instruction; this could be an AVX instruction, 192bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // or it could be control flow. 19336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurState = EXITS_DIRTY; 194bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman continue; 195bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman } 1963bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 197bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // Check for control-flow out of the current function (which might 198bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // indirectly execute SSE instructions). 199bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman if (!isControlFlow) 200bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman continue; 201bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 2022990853ea8bf4888b179ac6c493836b83769e87bBill Wendling // If the call won't clobber any YMM register, skip it as well. It usually 2032990853ea8bf4888b179ac6c493836b83769e87bBill Wendling // happens on helper function calls (such as '_chkstk', '_ftol2') where 2042990853ea8bf4888b179ac6c493836b83769e87bBill Wendling // standard calling convention is not used (RegMask is not used to mark 2052990853ea8bf4888b179ac6c493836b83769e87bBill Wendling // register clobbered and register usage (def/imp-def/use) is well-dfined 2062990853ea8bf4888b179ac6c493836b83769e87bBill Wendling // and explicitly specified. 20736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->isCall() && !callClobbersAnyYmmReg(MI)) 2082990853ea8bf4888b179ac6c493836b83769e87bBill Wendling continue; 2092990853ea8bf4888b179ac6c493836b83769e87bBill Wendling 210bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX 211bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // registers. This instruction has zero latency. In addition, the processor 212bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // changes back to Clean state, after which execution of Intel SSE 213bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // instructions or Intel AVX instructions has no transition penalty. Add 214bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // the VZEROUPPER instruction before any function call/return that might 215bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // execute SSE code. 216bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // FIXME: In some cases, we may want to move the VZEROUPPER into a 217bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // predecessor block. 21836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (CurState == EXITS_DIRTY) { 219bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // After the inserted VZEROUPPER the state becomes clean again, but 220bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // other YMM may appear before other subsequent calls or even before 221bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman // the end of the BB. 22236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines insertVZeroUpper(I, MBB); 22336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurState = EXITS_CLEAN; 22436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (CurState == PASS_THROUGH) { 22536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If this block is currently in pass-through state and we encounter a 22636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // call then whether we need a vzeroupper or not depends on whether this 22736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // block has successors that exit dirty. Record the location of the call, 22836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet. 22936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // It will be inserted later if necessary. 23036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates[MBB.getNumber()].FirstUnguardedCall = I; 23136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CurState = EXITS_CLEAN; 2323bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes } 2333bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes } 2343bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes 23536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: " 23636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << getBlockExitStateName(CurState) << '\n'); 23736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 23836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (CurState == EXITS_DIRTY) 23936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 24036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SE = MBB.succ_end(); 24136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SI != SE; ++SI) 24236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines addDirtySuccessor(**SI); 24336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 24436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates[MBB.getNumber()].ExitState = CurState; 24536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 24636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 24736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// runOnMachineFunction - Loop over all of the basic blocks, inserting 24836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// vzero upper instructions before function calls. 24936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { 250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>(); 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!ST.hasAVX() || ST.hasAVX512()) 25236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 25336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TII = MF.getTarget().getInstrInfo(); 25436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineRegisterInfo &MRI = MF.getRegInfo(); 25536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EverMadeChange = false; 256bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 25736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Fast check: if the function doesn't use any ymm registers, we don't need 25836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // to insert any VZEROUPPER instructions. This is constant-time, so it is 25936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // cheap in the common case of no ymm use. 26036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool YMMUsed = false; 26136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *RC = &X86::VR256RegClass; 26236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); 26336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines i != e; i++) { 26436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!MRI.reg_nodbg_empty(*i)) { 26536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines YMMUsed = true; 26636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 26736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 26836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 26936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!YMMUsed) { 27036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 27136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 272bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 27336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert(BlockStates.empty() && DirtySuccessors.empty() && 27436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "X86VZeroUpper state should be clear"); 27536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates.resize(MF.getNumBlockIDs()); 27636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 27736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Process all blocks. This will compute block exit states, record the first 27836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // unguarded call in each block, and add successors of dirty blocks to the 27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // DirtySuccessors list. 28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 28136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines processBasicBlock(*I); 28236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 28336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If any YMM regs are live in to this function, add the entry block to the 28436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // DirtySuccessors list 28536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (checkFnHasLiveInYmm(MRI)) 28636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines addDirtySuccessor(MF.front()); 28736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 28836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add 28936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY 29036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // through PASS_THROUGH blocks. 29136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines while (!DirtySuccessors.empty()) { 29236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock &MBB = *DirtySuccessors.back(); 29336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DirtySuccessors.pop_back(); 29436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockState &BBState = BlockStates[MBB.getNumber()]; 29536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 29636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // MBB is a successor of a dirty block, so its first call needs to be 29736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // guarded. 29836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (BBState.FirstUnguardedCall != MBB.end()) 29936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines insertVZeroUpper(BBState.FirstUnguardedCall, MBB); 30036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If this successor was a pass-through block then it is now dirty, and its 30236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // successors need to be added to the worklist (if they haven't been 30336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // already). 30436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (BBState.ExitState == PASS_THROUGH) { 30536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "MBB #" << MBB.getNumber() 30636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << " was Pass-through, is now Dirty-out.\n"); 30736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), 30836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SE = MBB.succ_end(); 30936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SI != SE; ++SI) 31036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines addDirtySuccessor(**SI); 31136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 31236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 313bd00a934c653fb1666fa7d18267644b4e9d14e5eEli Friedman 31436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BlockStates.clear(); 31536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return EverMadeChange; 3163bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes} 317