X86VZeroUpper.cpp revision 3bde6fe0df05558b89e7edfe48ac05da59beb81a
13bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===//
23bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
33bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//                     The LLVM Compiler Infrastructure
43bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
53bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source
63bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// License. See LICENSE.TXT for details.
73bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
83bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
93bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
103bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// This file defines the pass which inserts x86 AVX vzeroupper instructions
113bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// before calls to SSE encoded functions. This avoids transition latency
123bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// penalty when tranfering control between AVX encoded instructions and old
133bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes// SSE encoding mode.
143bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//
153bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes//===----------------------------------------------------------------------===//
163bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
173bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#define DEBUG_TYPE "x86-codegen"
183bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86.h"
193bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "X86InstrInfo.h"
203bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/ADT/Statistic.h"
213bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineFunctionPass.h"
223bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/MachineInstrBuilder.h"
233bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/CodeGen/Passes.h"
243bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/GlobalValue.h"
253bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes#include "llvm/Target/TargetInstrInfo.h"
263bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesusing namespace llvm;
273bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
283bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesSTATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
293bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
303bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesnamespace {
313bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  struct VZeroUpperInserter : public MachineFunctionPass {
323bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    static char ID;
333bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    VZeroUpperInserter() : MachineFunctionPass(ID) {}
343bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
353bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual bool runOnMachineFunction(MachineFunction &MF);
363bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
373bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
383bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
393bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    virtual const char *getPassName() const { return "X86 vzeroupper inserter";}
403bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
413bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  private:
423bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    const TargetInstrInfo *TII; // Machine instruction info.
433bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    MachineBasicBlock *MBB;     // Current basic block
443bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  };
453bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  char VZeroUpperInserter::ID = 0;
463bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
473bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
483bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso LopesFunctionPass *llvm::createX86IssueVZeroUpperPass() {
493bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return new VZeroUpperInserter();
503bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
513bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
523bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// runOnMachineFunction - Loop over all of the basic blocks, inserting
533bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// vzero upper instructions before function calls.
543bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
553bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  TII = MF.getTarget().getInstrInfo();
563bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  bool Changed = false;
573bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
583bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  // Process any unreachable blocks in arbitrary order now.
593bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
603bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    Changed |= processBasicBlock(MF, *BB);
613bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
623bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return Changed;
633bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
643bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
653bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool isCallToModuleFn(const MachineInstr *MI) {
663bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  assert(MI->getDesc().isCall() && "Isn't a call instruction");
673bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
683bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
693bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    const MachineOperand &MO = MI->getOperand(i);
703bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
713bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    if (!MO.isGlobal())
723bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes      continue;
733bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
743bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    const GlobalValue *GV = MO.getGlobal();
753bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    GlobalValue::LinkageTypes LT = GV->getLinkage();
763bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
773bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes        (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
783bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes      return true;
793bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
803bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    return false;
813bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
823bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return false;
833bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
843bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
853bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// processBasicBlock - Loop over all of the instructions in the basic block,
863bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes/// inserting vzero upper instructions before function calls.
873bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopesbool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
883bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes                                           MachineBasicBlock &BB) {
893bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  bool Changed = false;
903bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  MBB = &BB;
913bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
923bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
933bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    MachineInstr *MI = I;
943bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    DebugLoc dl = I->getDebugLoc();
953bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
963bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    // Insert a vzeroupper instruction before each control transfer
973bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    // to functions outside this module
983bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
993bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes      BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
1003bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes      ++NumVZU;
1013bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes    }
1023bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  }
1033bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes
1043bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes  return Changed;
1053bde6fe0df05558b89e7edfe48ac05da59beb81aBruno Cardoso Lopes}
106