1//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
11// of a MachineFunction.
12//
13//   mov %SPL, %depot
14//   cvta.local %SP, %SPL
15//
16// Because Frame Index is a generic address and alloca can only return generic
17// pointer, without this pass the instructions producing alloca'ed address will
18// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
19// this address with their .local versions, but this may introduce a lot of
20// cvta.to.local instructions. Performance can be improved if we avoid casting
21// address back and forth and directly calculate local address based on %SPL.
22// This peephole pass optimizes these cases, for example
23//
24// It will transform the following pattern
25//    %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
26//    %vreg1<def> = cvta_to_local_yes_64 %vreg0
27//
28// into
29//    %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
30//
31// %VRFrameLocal is the virtual register name of %SPL
32//
33//===----------------------------------------------------------------------===//
34
35#include "NVPTX.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/Target/TargetRegisterInfo.h"
40#include "llvm/Target/TargetInstrInfo.h"
41
42using namespace llvm;
43
44#define DEBUG_TYPE "nvptx-peephole"
45
46namespace llvm {
47void initializeNVPTXPeepholePass(PassRegistry &);
48}
49
50namespace {
51struct NVPTXPeephole : public MachineFunctionPass {
52 public:
53  static char ID;
54  NVPTXPeephole() : MachineFunctionPass(ID) {
55    initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
56  }
57
58  bool runOnMachineFunction(MachineFunction &MF) override;
59
60  const char *getPassName() const override {
61    return "NVPTX optimize redundant cvta.to.local instruction";
62  }
63
64  void getAnalysisUsage(AnalysisUsage &AU) const override {
65    MachineFunctionPass::getAnalysisUsage(AU);
66  }
67};
68}
69
70char NVPTXPeephole::ID = 0;
71
72INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
73
74static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
75  auto &MBB = *Root.getParent();
76  auto &MF = *MBB.getParent();
77  // Check current instruction is cvta.to.local
78  if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
79      Root.getOpcode() != NVPTX::cvta_to_local_yes)
80    return false;
81
82  auto &Op = Root.getOperand(1);
83  const auto &MRI = MF.getRegInfo();
84  MachineInstr *GenericAddrDef = nullptr;
85  if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
86    GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
87  }
88
89  // Check the register operand is uniquely defined by LEA_ADDRi instruction
90  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
91      (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
92       GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
93    return false;
94  }
95
96  // Check the LEA_ADDRi operand is Frame index
97  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
98  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
99    return true;
100  }
101
102  return false;
103}
104
105static void CombineCVTAToLocal(MachineInstr &Root) {
106  auto &MBB = *Root.getParent();
107  auto &MF = *MBB.getParent();
108  const auto &MRI = MF.getRegInfo();
109  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
110  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
111
112  MachineInstrBuilder MIB =
113      BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
114              Root.getOperand(0).getReg())
115          .addReg(NVPTX::VRFrameLocal)
116          .addOperand(Prev.getOperand(2));
117
118  MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
119
120  // Check if MRI has only one non dbg use, which is Root
121  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
122    Prev.eraseFromParentAndMarkDBGValuesForRemoval();
123  }
124  Root.eraseFromParentAndMarkDBGValuesForRemoval();
125}
126
127bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
128  if (skipFunction(*MF.getFunction()))
129    return false;
130
131  bool Changed = false;
132  // Loop over all of the basic blocks.
133  for (auto &MBB : MF) {
134    // Traverse the basic block.
135    auto BlockIter = MBB.begin();
136
137    while (BlockIter != MBB.end()) {
138      auto &MI = *BlockIter++;
139      if (isCVTAToLocalCombinationCandidate(MI)) {
140        CombineCVTAToLocal(MI);
141        Changed = true;
142      }
143    }  // Instruction
144  }    // Basic Block
145
146  // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
147  const auto &MRI = MF.getRegInfo();
148  if (MRI.use_empty(NVPTX::VRFrame)) {
149    if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
150      MI->eraseFromParentAndMarkDBGValuesForRemoval();
151    }
152  }
153
154  return Changed;
155}
156
157MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
158