1//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning 11// of a MachineFunction. 12// 13// mov %SPL, %depot 14// cvta.local %SP, %SPL 15// 16// Because Frame Index is a generic address and alloca can only return generic 17// pointer, without this pass the instructions producing alloca'ed address will 18// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on 19// this address with their .local versions, but this may introduce a lot of 20// cvta.to.local instructions. Performance can be improved if we avoid casting 21// address back and forth and directly calculate local address based on %SPL. 22// This peephole pass optimizes these cases, for example 23// 24// It will transform the following pattern 25// %vreg0<def> = LEA_ADDRi64 %VRFrame, 4 26// %vreg1<def> = cvta_to_local_yes_64 %vreg0 27// 28// into 29// %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4 30// 31// %VRFrameLocal is the virtual register name of %SPL 32// 33//===----------------------------------------------------------------------===// 34 35#include "NVPTX.h" 36#include "llvm/CodeGen/MachineFunctionPass.h" 37#include "llvm/CodeGen/MachineInstrBuilder.h" 38#include "llvm/CodeGen/MachineRegisterInfo.h" 39#include "llvm/Target/TargetRegisterInfo.h" 40#include "llvm/Target/TargetInstrInfo.h" 41 42using namespace llvm; 43 44#define DEBUG_TYPE "nvptx-peephole" 45 46namespace llvm { 47void initializeNVPTXPeepholePass(PassRegistry &); 48} 49 50namespace { 51struct NVPTXPeephole : public MachineFunctionPass { 52 public: 53 static char ID; 54 NVPTXPeephole() : MachineFunctionPass(ID) { 55 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); 56 } 57 58 bool runOnMachineFunction(MachineFunction &MF) override; 59 60 const char *getPassName() const override { 61 return "NVPTX optimize redundant cvta.to.local instruction"; 62 } 63 64 void getAnalysisUsage(AnalysisUsage &AU) const override { 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67}; 68} 69 70char NVPTXPeephole::ID = 0; 71 72INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false) 73 74static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { 75 auto &MBB = *Root.getParent(); 76 auto &MF = *MBB.getParent(); 77 // Check current instruction is cvta.to.local 78 if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 && 79 Root.getOpcode() != NVPTX::cvta_to_local_yes) 80 return false; 81 82 auto &Op = Root.getOperand(1); 83 const auto &MRI = MF.getRegInfo(); 84 MachineInstr *GenericAddrDef = nullptr; 85 if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) { 86 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); 87 } 88 89 // Check the register operand is uniquely defined by LEA_ADDRi instruction 90 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || 91 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && 92 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { 93 return false; 94 } 95 96 // Check the LEA_ADDRi operand is Frame index 97 auto &BaseAddrOp = GenericAddrDef->getOperand(1); 98 if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) { 99 return true; 100 } 101 102 return false; 103} 104 105static void CombineCVTAToLocal(MachineInstr &Root) { 106 auto &MBB = *Root.getParent(); 107 auto &MF = *MBB.getParent(); 108 const auto &MRI = MF.getRegInfo(); 109 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 110 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); 111 112 MachineInstrBuilder MIB = 113 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), 114 Root.getOperand(0).getReg()) 115 .addReg(NVPTX::VRFrameLocal) 116 .addOperand(Prev.getOperand(2)); 117 118 MBB.insert((MachineBasicBlock::iterator)&Root, MIB); 119 120 // Check if MRI has only one non dbg use, which is Root 121 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) { 122 Prev.eraseFromParentAndMarkDBGValuesForRemoval(); 123 } 124 Root.eraseFromParentAndMarkDBGValuesForRemoval(); 125} 126 127bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { 128 if (skipFunction(*MF.getFunction())) 129 return false; 130 131 bool Changed = false; 132 // Loop over all of the basic blocks. 133 for (auto &MBB : MF) { 134 // Traverse the basic block. 135 auto BlockIter = MBB.begin(); 136 137 while (BlockIter != MBB.end()) { 138 auto &MI = *BlockIter++; 139 if (isCVTAToLocalCombinationCandidate(MI)) { 140 CombineCVTAToLocal(MI); 141 Changed = true; 142 } 143 } // Instruction 144 } // Basic Block 145 146 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal 147 const auto &MRI = MF.getRegInfo(); 148 if (MRI.use_empty(NVPTX::VRFrame)) { 149 if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) { 150 MI->eraseFromParentAndMarkDBGValuesForRemoval(); 151 } 152 } 153 154 return Changed; 155} 156 157MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } 158