X86VZeroUpper.cpp revision bd00a934c653fb1666fa7d18267644b4e9d14e5e
1b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===// 277349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// 377349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// The LLVM Compiler Infrastructure 477349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// 577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// This file is distributed under the University of Illinois Open Source 677349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// License. See LICENSE.TXT for details. 777349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// 877349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//===----------------------------------------------------------------------===// 977349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// 10b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// This file defines the pass which inserts x86 AVX vzeroupper instructions 11b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// before calls to SSE encoded functions. This avoids transition latency 12b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek// penalty when tranfering control between AVX encoded instructions and old 1377349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// SSE encoding mode. 1477349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek// 1577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek//===----------------------------------------------------------------------===// 1677349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek 17aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek#define DEBUG_TYPE "x86-vzeroupper" 1877349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "X86.h" 1977349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "X86InstrInfo.h" 20b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/ADT/Statistic.h" 2177349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include "llvm/CodeGen/MachineFunctionPass.h" 22b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/MachineInstrBuilder.h" 23b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/MachineRegisterInfo.h" 24b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/CodeGen/Passes.h" 25e5f4dcb6bd73a10df6eb6c3cfe057c88cb2362ccTed Kremenek#include "llvm/Support/Debug.h" 26b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek#include "llvm/Support/raw_ostream.h" 27aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek#include "llvm/Target/TargetInstrInfo.h" 28b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekusing namespace llvm; 29b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 30f116bd654bcdb5d7c22656f224deeb7a67f7d0cdTed KremenekSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); 31aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek 32aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremeneknamespace { 33aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek struct VZeroUpperInserter : public MachineFunctionPass { 34330dddd19406f9cc227e59e0bb0a36ecdc52915eTed Kremenek static char ID; 3577349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek VZeroUpperInserter() : MachineFunctionPass(ID) {} 36b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 37b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek virtual bool runOnMachineFunction(MachineFunction &MF); 38b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 39b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 40b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 41b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek virtual const char *getPassName() const { return "X86 vzeroupper inserter";} 42b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 43b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek private: 44b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek const TargetInstrInfo *TII; // Machine instruction info. 45b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek MachineBasicBlock *MBB; // Current basic block 46b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 47b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Any YMM register live-in to this function? 48b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek bool FnHasLiveInYmm; 49b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 50b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // BBState - Contains the state of each MBB: unknown, clean, dirty 51aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek SmallVector<uint8_t, 8> BBState; 52240f1f00dda1d481276ea872fe8f8851581a7e6bTed Kremenek 53b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // BBSolved - Keep track of all MBB which had been already analyzed 54b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // and there is no further processing required. 55aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek BitVector BBSolved; 56b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 57b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Machine Basic Blocks are classified according this pass: 58b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // 59b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // ST_UNKNOWN - The MBB state is unknown, meaning from the entry state 60b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // until the MBB exit there isn't a instruction using YMM to change 61b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // the state to dirty, or one of the incoming predecessors is unknown 62b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // and there's not a dirty predecessor between them. 63b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // 64b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have 65b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // instructions using YMM and be marked ST_CLEAN, as long as the state 66affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek // is cleaned by a vzeroupper before any call. 674a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // 684a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a 69affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek // vzeroupper instruction. 705e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek // 711e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek // ST_INIT - Placeholder for an empty state set 72affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek // 73affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek enum { 748cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek ST_UNKNOWN = 0, 758cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek ST_CLEAN = 1, 76b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek ST_DIRTY = 2, 774a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek ST_INIT = 3 784a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek }; 794a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek 80b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // computeState - Given two states, compute the resulting state, in 814a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // the following way 824a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // 834a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // 1) One dirty state yields another dirty state 849dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek // 2) All states must be clean for the result to be clean 85affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek // 3) If none above and one unknown, the result state is also unknown 86affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek // 87affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek unsigned computeState(unsigned PrevState, unsigned CurState) { 88affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek if (PrevState == ST_INIT) 89b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek return CurState; 9007d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek 91b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek if (PrevState == ST_DIRTY || CurState == ST_DIRTY) 9207d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek return ST_DIRTY; 9307d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek 9407d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek if (PrevState == ST_CLEAN && CurState == ST_CLEAN) 95b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek return ST_CLEAN; 9607d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek 9707d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek return ST_UNKNOWN; 984a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek } 994a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek 10007d83aa220567bef263ef76cfc9b0159320bb640Ted Kremenek }; 1014d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek char VZeroUpperInserter::ID = 0; 1024d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek} 1034d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 1044d839b4949efe9e2b16eeab679c25b28e31ea742Ted KremenekFunctionPass *llvm::createX86IssueVZeroUpperPass() { 1054d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek return new VZeroUpperInserter(); 1064d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek} 1074d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 1084d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenekstatic bool isYmmReg(unsigned Reg) { 1094d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek if (Reg >= X86::YMM0 && Reg <= X86::YMM15) 110b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek return true; 1118cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek 1128cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek return false; 1138cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek} 1148cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek 1155e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenekstatic bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { 1165e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(), 1175e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek E = MRI.livein_end(); I != E; ++I) 1185e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek if (isYmmReg(I->first)) 1192ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek return true; 1202ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek 1212ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek return false; 1222ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek} 123b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 124b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekstatic bool hasYmmReg(MachineInstr *MI) { 125b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { 126b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek const MachineOperand &MO = MI->getOperand(i); 127b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek if (!MO.isReg()) 128b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek continue; 129b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek if (MO.isDebug()) 130240f1f00dda1d481276ea872fe8f8851581a7e6bTed Kremenek continue; 131b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek if (isYmmReg(MO.getReg())) 132b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek return true; 133b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 134b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek return false; 135b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek} 136b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 137b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek/// runOnMachineFunction - Loop over all of the basic blocks, inserting 138b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek/// vzero upper instructions before function calls. 139b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenekbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { 140b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek TII = MF.getTarget().getInstrInfo(); 141b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek MachineRegisterInfo &MRI = MF.getRegInfo(); 142b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek bool EverMadeChange = false; 143b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 144b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Fast check: if the function doesn't use any ymm registers, we don't need 145b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // to insert any VZEROUPPER instructions. This is constant-time, so it is 146b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // cheap in the common case of no ymm use. 147b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek bool YMMUsed = false; 148b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek TargetRegisterClass *RC = X86::VR256RegisterClass; 149b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); 150e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek i != e; i++) { 151e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek if (MRI.isPhysRegUsed(*i)) { 152ffe0f43806d4823271c2406c1fccc2373115c36aTed Kremenek YMMUsed = true; 153e01c98767dfd7153c3c84637c36659e3bbe16ff7Ted Kremenek break; 154b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 155b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 156aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek if (!YMMUsed) 157b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek return EverMadeChange; 1584a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek 1594a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // Pre-compute the existence of any live-in YMM registers to this function 160b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); 161b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 1624a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek assert(BBState.empty()); 1634a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek BBState.resize(MF.getNumBlockIDs(), 0); 1649dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek BBSolved.resize(MF.getNumBlockIDs(), 0); 1659dca062461a6244cf0f733346657fa3eee853f9bTed Kremenek 166b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Each BB state depends on all predecessors, loop over until everything 167b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // converges. (Once we converge, we can implicitly mark everything that is 168b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // still ST_UNKNOWN as ST_CLEAN.) 169b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek while (1) { 170b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek bool MadeChange = false; 171b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 172b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Process all basic blocks. 173b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) 1744a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek MadeChange |= processBasicBlock(MF, *I); 1754a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek 176b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek // If this iteration over the code changed anything, keep iterating. 177b5339121f63f2754d4f26e8f3a092caf9f7d9290Ted Kremenek if (!MadeChange) break; 1784d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek EverMadeChange = true; 1794d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek } 1804d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 1814d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek BBState.clear(); 1824d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek BBSolved.clear(); 1834d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek return EverMadeChange; 184d763eb91aab5bdecd11825fadb35d6d8cc905f63Ted Kremenek} 185d763eb91aab5bdecd11825fadb35d6d8cc905f63Ted Kremenek 186affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek/// processBasicBlock - Loop over all of the instructions in the basic block, 187affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenek/// inserting vzero upper instructions before function calls. 188affb2159712b2373a18a89ed205c1a309d3aec12Ted Kremenekbool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, 1898cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek MachineBasicBlock &BB) { 1908cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek bool Changed = false; 1918cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek unsigned BBNum = BB.getNumber(); 1928cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek MBB = &BB; 1938cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek 1945e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek // Don't process already solved BBs 1955e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek if (BBSolved[BBNum]) 1965e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek return false; // No changes 1975e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek 1982ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek // Check the state of all predecessors 1991e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek unsigned EntryState = ST_INIT; 2001e80aa49ec689d1937e54fb353d6626e0a58f0dbTed Kremenek for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(), 2012ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek PE = BB.pred_end(); PI != PE; ++PI) { 2022ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]); 203d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek if (EntryState == ST_DIRTY) 204d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek break; 205d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek } 206d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek 2074a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek 2084a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // The entry MBB for the function may set the inital state to dirty if 2094a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek // the function receives any YMM incoming arguments 210d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek if (MBB == MF.begin()) { 211d87a321a3c3902f7acfc6539b8946a00da6e45ccTed Kremenek EntryState = ST_CLEAN; 2124d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek if (FnHasLiveInYmm) 2134d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek EntryState = ST_DIRTY; 2144d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek } 2154d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 2164d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek // The current state is initialized according to the predecessors 2174d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek unsigned CurState = EntryState; 2184d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek bool BBHasCall = false; 2194d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 2204d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 2214d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek MachineInstr *MI = I; 2224d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek DebugLoc dl = I->getDebugLoc(); 2234d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn(); 2244d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek 2254d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek // Shortcut: don't need to check regular instructions in dirty state. 2264d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek if (!isControlFlow && CurState == ST_DIRTY) 2274d839b4949efe9e2b16eeab679c25b28e31ea742Ted Kremenek continue; 228b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 2298cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek if (hasYmmReg(MI)) { 2308cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek // We found a ymm-using instruction; this could be an AVX instruction, 2318cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek // or it could be control flow. 2325e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek CurState = ST_DIRTY; 2335e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek continue; 2345e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek } 2355e03fcb5420c33207433dd6f800588e256dd9bdbTed Kremenek 2368cc13ea74fea1c04042a2f4087665bc5182e8408Ted Kremenek // Check for control-flow out of the current function (which might 2372ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek // indirectly execute SSE instructions). 2382ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek if (!isControlFlow) 2392ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek continue; 2402ded35a576e3899553ea0ccfcbf5cbdb3d8cf664Ted Kremenek 241b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek BBHasCall = true; 2426a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek 243b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX 244b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // registers. This instruction has zero latency. In addition, the processor 2456a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek // changes back to Clean state, after which execution of Intel SSE 2466a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek // instructions or Intel AVX instructions has no transition penalty. Add 2476a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek // the VZEROUPPER instruction before any function call/return that might 2486a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek // execute SSE code. 2496a6719a3a11087b48d9f1a4eb08b3bd43cb05a65Ted Kremenek // FIXME: In some cases, we may want to move the VZEROUPPER into a 250b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // predecessor block. 251b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek if (CurState == ST_DIRTY) { 252b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // Only insert the VZEROUPPER in case the entry state isn't unknown. 253b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // When unknown, only compute the information within the block to have 254b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // it available in the exit if possible, but don't change the block. 255b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek if (EntryState != ST_UNKNOWN) { 256b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); 257b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek ++NumVZU; 258b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 259b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 26005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // After the inserted VZEROUPPER the state becomes clean again, but 26105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // other YMM may appear before other subsequent calls or even before 26205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // the end of the BB. 263b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek CurState = ST_CLEAN; 264b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 265b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek } 266b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 267b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek DEBUG(dbgs() << "MBB #" << BBNum 268aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek << ", current state: " << CurState << '\n'); 269b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek 270b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // A BB can only be considered solved when we both have done all the 271b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // necessary transformations, and have computed the exit state. This happens 272aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek // in two cases: 273b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // 1) We know the entry state: this immediately implies the exit state and 274aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek // all the necessary transformations. 275aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek // 2) There are no calls, and and a non-call instruction marks this block: 276b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek // no transformations are necessary, and we know the exit state. 27705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN)) 278aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek BBSolved[BBNum] = true; 2795a7b3821c6abed7f58a53a94eac128bd23d23289Ted Kremenek 28005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek if (CurState != BBState[BBNum]) 281b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek Changed = true; 282aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek 283aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek BBState[BBNum] = CurState; 284aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek return Changed; 285aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek} 286aed9b6ac2ed0013133e4d4aebf2fad2ccd27f3e6Ted Kremenek